From 2d1445cf4d730a3f29320943bf0364763b528893 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 2 Dec 2019 12:15:46 -0800 Subject: [PATCH 01/67] fixed no of outputs --- ngraph_bridge/ngraph_encapsulate_op.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index a67f2084e..0bf21451d 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -413,6 +413,7 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { // ComputeUsingParallelExecutor //--------------------------------------------------------------------------- void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { + cout << "using parallel exec " << endl; // TF input tensors std::vector tf_input_tensors; @@ -484,7 +485,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // create inputs, outputs, pipelineId int num_of_inputs = tensor_manager->GetNumberOfInputs(); - int num_of_outputs = tensor_manager->GetNumberOfInputs(); + int num_of_outputs = tensor_manager->GetNumberOfOutputs(); int current_iter_pipeline_depth = get<0>(io_tensors); vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); @@ -497,6 +498,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { bool skip_tf2ng_copy = false; if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != nullptr) { + cout << "using prefetch env flag " << endl; NGraphPrefetchSharedResouce::InputTensorBundle prefetch_input_tensor_bundle{ current_iter_pipeline_depth, ng_inputs}; // Set the prefetch shared obj if applicable @@ -542,6 +544,8 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " "signal prefetching"; } else { + cout << "using prefetch inputs " << endl; + int prefetch_buffer_depth = shared_data->GetBufferDepth(); int skip_count = shared_data->GetSkipCount(); NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth From 36a0b6191343c260e37fdbaa1da3f772cc130e87 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 2 Dec 2019 14:54:21 -0800 Subject: [PATCH 02/67] Some minor changes --- ngraph_bridge/ngraph_encapsulate_op.cc | 37 +++++++++++--------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 0bf21451d..2f261377a 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -458,34 +458,29 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // Get Tensor Manager and some error checking auto tensor_manager = m_parallel_executor->GetTensorManager(); - OP_REQUIRES(ctx, tensor_manager->GetNumberOfInputs() == ctx->num_inputs(), - errors::Internal("Num of inputs from TensorManager ", - tensor_manager->GetNumberOfInputs(), - " and Ctx->num_inputs() ", ctx->num_inputs(), - " do not match")); - OP_REQUIRES(ctx, - tensor_manager->GetNumberOfInputs() == tf_input_tensors.size(), + int num_of_inputs = tensor_manager->GetNumberOfInputs(); + int num_of_outputs = tensor_manager->GetNumberOfOutputs(); + OP_REQUIRES(ctx, num_of_inputs == ctx->num_inputs(), errors::Internal("Num of inputs from TensorManager ", - tensor_manager->GetNumberOfInputs(), - " and num of " - "input tensors from ctxt ", - tf_input_tensors.size(), " do not match")); + num_of_inputs, " and Ctx->num_inputs() ", + ctx->num_inputs(), " do not match")); + OP_REQUIRES( + ctx, num_of_inputs == tf_input_tensors.size(), + errors::Internal("Num of inputs from TensorManager ", num_of_inputs, + " and num of " + "input tensors from ctxt ", + tf_input_tensors.size(), " do not match")); - OP_REQUIRES(ctx, tensor_manager->GetNumberOfOutputs() == ctx->num_outputs(), + OP_REQUIRES(ctx, num_of_outputs == ctx->num_outputs(), errors::Internal("Num of outputs from TensorManager ", - tensor_manager->GetNumberOfOutputs(), - " and Ctx->num_outputs()", ctx->num_outputs(), - " do not match")); - OP_REQUIRES(ctx, tensor_manager->GetNumberOfOutputs() == - ng_exec->get_results().size(), + num_of_outputs, " and Ctx->num_outputs()", + ctx->num_outputs(), " do not match")); + OP_REQUIRES(ctx, num_of_outputs == ng_exec->get_results().size(), errors::Internal("Num of outputs from TensorManager ", - tensor_manager->GetNumberOfOutputs(), - "and number of exec outputs ", + num_of_outputs, "and number of exec outputs ", ng_exec->get_results().size(), " do not match")); // create inputs, outputs, pipelineId - int num_of_inputs = tensor_manager->GetNumberOfInputs(); - int num_of_outputs = tensor_manager->GetNumberOfOutputs(); int current_iter_pipeline_depth = get<0>(io_tensors); vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); From e65d66fa66f92c0f428e35e49e224dc7e6092a70 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 2 Dec 2019 15:21:07 -0800 Subject: [PATCH 03/67] initialize only pipelined tensors --- ngraph_bridge/ngraph_encapsulate_op.cc | 6 ++++ ngraph_bridge/ngraph_executor.cc | 38 +++++++++++++------------- ngraph_bridge/ngraph_executor.h | 3 +- 3 files changed, 27 insertions(+), 20 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 2f261377a..251b11c0f 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -470,6 +470,12 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { " and num of " "input tensors from ctxt ", tf_input_tensors.size(), " do not match")); + OP_REQUIRES( + ctx, num_of_inputs == ng_exec->get_parameters().size(), + errors::Internal("Num of inputs from TensorManager ", num_of_inputs, + " and num of " + "parameters from exec ", + ng_exec->get_parameters().size(), " do not match")); OP_REQUIRES(ctx, num_of_outputs == ctx->num_outputs(), errors::Internal("Num of outputs from TensorManager ", diff --git a/ngraph_bridge/ngraph_executor.cc b/ngraph_bridge/ngraph_executor.cc index e75ff55cf..b24e04f07 100644 --- a/ngraph_bridge/ngraph_executor.cc +++ b/ngraph_bridge/ngraph_executor.cc @@ -318,7 +318,7 @@ NGraphExecutor::CreateCallback(const std::string signature, // Create PipelinedTensorStore if (status_ng_exec_pair.first == Status::OK()) { ng_exec = status_ng_exec_pair.second; - auto status_ng_pts_pair = InitializeIOTensorPipeline(ng_exec); + auto status_ng_pts_pair = InitializeIOTensorPipeline(ng_exec, m_tensor_manager->GetPipelinedInputIndexes(), m_tensor_manager->GetPipelinedOutputIndexes()); pts = status_ng_pts_pair.second; return std::make_pair(status_ng_pts_pair.first, std::make_tuple(ng_exec, serialized_ng_func, pts)); @@ -463,7 +463,9 @@ Status NGraphExecutor::ParseNodeAttributes( std::pair> NGraphExecutor::InitializeIOTensorPipeline( - std::shared_ptr ng_exec) { + std::shared_ptr ng_exec, + const vector& pipelined_input_indexes, + const vector& pipelined_output_indexes) { if (!m_executable_can_create_tensor) { return std::make_pair( errors::Internal( @@ -472,27 +474,25 @@ NGraphExecutor::InitializeIOTensorPipeline( nullptr); } // Create these pipelined ng tensors only if needed, else reuse from cache - size_t num_inputs = ng_exec->get_parameters().size(); - size_t num_outputs = ng_exec->get_results().size(); - - if (num_outputs == 0) { - return std::make_pair( - errors::Internal("Bad input/output length. Input size: ", num_inputs, - " Output size: ", num_outputs), - nullptr); - } + size_t num_pipelined_inputs = pipelined_input_indexes.size(); + size_t num_pipelined_outputs = pipelined_output_indexes.size(); // If the input or the output size if 0 then??? - NGRAPH_VLOG(5) << "InitializeIOTensorPipeline: In: " << num_inputs - << " Out: " << num_outputs; - PipelinedTensorMatrix pipelined_input_tensors(num_inputs); - PipelinedTensorMatrix pipelined_output_tensors(num_outputs); - for (size_t i = 0; i < num_inputs; i++) { - pipelined_input_tensors[i] = ng_exec->create_input_tensor(i, m_depth); + NGRAPH_VLOG(5) << "InitializeIOTensorPipeline: No. of Pipelined Inputs: " << num_inputs + << " No. of Pipelined Pipelined Outputs: " << num_outputs; + PipelinedTensorMatrix pipelined_input_tensors(num_pipelined_inputs); + PipelinedTensorMatrix pipelined_output_tensors(num_pipelined_outputs); + + for (size_t i = 0; i < num_pipelined_inputs; i++) { + int input_index = pipelined_input_indexes[i]; + pipelined_input_tensors[i] = ng_exec->create_input_tensor(input_index, m_depth); } - for (size_t i = 0; i < num_outputs; i++) { - pipelined_output_tensors[i] = ng_exec->create_output_tensor(i, m_depth); + + for (size_t i = 0; i < num_pipelined_outputs; i++) { + int output_index = pipelined_output_indexes[i]; + pipelined_output_tensors[i] = ng_exec->create_output_tensor(output_index, m_depth); } + shared_ptr pts(new PipelinedTensorsStore( pipelined_input_tensors, pipelined_output_tensors)); diff --git a/ngraph_bridge/ngraph_executor.h b/ngraph_bridge/ngraph_executor.h index d2c715b89..229791cfd 100644 --- a/ngraph_bridge/ngraph_executor.h +++ b/ngraph_bridge/ngraph_executor.h @@ -103,7 +103,8 @@ class NGraphExecutor { // Called from CreateCallback std::pair> InitializeIOTensorPipeline( - std::shared_ptr ng_exec); + std::shared_ptr ng_exec, const vector& pipelined_input_indexes, + const vector& pipelined_output_indexes); // Get tensorflow input tensors, input shapes, static_inputs to Compute // Signature From 71d0cad054a48c9d8982f1bb65533c8d12dce987 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 2 Dec 2019 15:21:07 -0800 Subject: [PATCH 04/67] initialize only pipelined tensors --- ngraph_bridge/ngraph_encapsulate_op.cc | 6 ++++ ngraph_bridge/ngraph_executor.cc | 43 ++++++++++++++------------ ngraph_bridge/ngraph_executor.h | 4 ++- 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 2f261377a..251b11c0f 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -470,6 +470,12 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { " and num of " "input tensors from ctxt ", tf_input_tensors.size(), " do not match")); + OP_REQUIRES( + ctx, num_of_inputs == ng_exec->get_parameters().size(), + errors::Internal("Num of inputs from TensorManager ", num_of_inputs, + " and num of " + "parameters from exec ", + ng_exec->get_parameters().size(), " do not match")); OP_REQUIRES(ctx, num_of_outputs == ctx->num_outputs(), errors::Internal("Num of outputs from TensorManager ", diff --git a/ngraph_bridge/ngraph_executor.cc b/ngraph_bridge/ngraph_executor.cc index e75ff55cf..c60f49209 100644 --- a/ngraph_bridge/ngraph_executor.cc +++ b/ngraph_bridge/ngraph_executor.cc @@ -318,7 +318,9 @@ NGraphExecutor::CreateCallback(const std::string signature, // Create PipelinedTensorStore if (status_ng_exec_pair.first == Status::OK()) { ng_exec = status_ng_exec_pair.second; - auto status_ng_pts_pair = InitializeIOTensorPipeline(ng_exec); + auto status_ng_pts_pair = InitializeIOTensorPipeline( + ng_exec, m_tensor_manager->GetPipelinedInputIndexes(), + m_tensor_manager->GetPipelinedOutputIndexes()); pts = status_ng_pts_pair.second; return std::make_pair(status_ng_pts_pair.first, std::make_tuple(ng_exec, serialized_ng_func, pts)); @@ -463,7 +465,9 @@ Status NGraphExecutor::ParseNodeAttributes( std::pair> NGraphExecutor::InitializeIOTensorPipeline( - std::shared_ptr ng_exec) { + std::shared_ptr ng_exec, + const vector& pipelined_input_indexes, + const vector& pipelined_output_indexes) { if (!m_executable_can_create_tensor) { return std::make_pair( errors::Internal( @@ -472,27 +476,28 @@ NGraphExecutor::InitializeIOTensorPipeline( nullptr); } // Create these pipelined ng tensors only if needed, else reuse from cache - size_t num_inputs = ng_exec->get_parameters().size(); - size_t num_outputs = ng_exec->get_results().size(); - - if (num_outputs == 0) { - return std::make_pair( - errors::Internal("Bad input/output length. Input size: ", num_inputs, - " Output size: ", num_outputs), - nullptr); - } + size_t num_pipelined_inputs = pipelined_input_indexes.size(); + size_t num_pipelined_outputs = pipelined_output_indexes.size(); // If the input or the output size if 0 then??? - NGRAPH_VLOG(5) << "InitializeIOTensorPipeline: In: " << num_inputs - << " Out: " << num_outputs; - PipelinedTensorMatrix pipelined_input_tensors(num_inputs); - PipelinedTensorMatrix pipelined_output_tensors(num_outputs); - for (size_t i = 0; i < num_inputs; i++) { - pipelined_input_tensors[i] = ng_exec->create_input_tensor(i, m_depth); + NGRAPH_VLOG(5) << "InitializeIOTensorPipeline: No. of Pipelined Inputs: " + << num_inputs + << " No. of Pipelined Pipelined Outputs: " << num_outputs; + PipelinedTensorMatrix pipelined_input_tensors(num_pipelined_inputs); + PipelinedTensorMatrix pipelined_output_tensors(num_pipelined_outputs); + + for (size_t i = 0; i < num_pipelined_inputs; i++) { + int input_index = pipelined_input_indexes[i]; + pipelined_input_tensors[i] = + ng_exec->create_input_tensor(input_index, m_depth); } - for (size_t i = 0; i < num_outputs; i++) { - pipelined_output_tensors[i] = ng_exec->create_output_tensor(i, m_depth); + + for (size_t i = 0; i < num_pipelined_outputs; i++) { + int output_index = pipelined_output_indexes[i]; + pipelined_output_tensors[i] = + ng_exec->create_output_tensor(output_index, m_depth); } + shared_ptr pts(new PipelinedTensorsStore( pipelined_input_tensors, pipelined_output_tensors)); diff --git a/ngraph_bridge/ngraph_executor.h b/ngraph_bridge/ngraph_executor.h index d2c715b89..a71851c2c 100644 --- a/ngraph_bridge/ngraph_executor.h +++ b/ngraph_bridge/ngraph_executor.h @@ -103,7 +103,9 @@ class NGraphExecutor { // Called from CreateCallback std::pair> InitializeIOTensorPipeline( - std::shared_ptr ng_exec); + std::shared_ptr ng_exec, + const vector& pipelined_input_indexes, + const vector& pipelined_output_indexes); // Get tensorflow input tensors, input shapes, static_inputs to Compute // Signature From 8e429a5bc5fd4ba17150244580b37e25477cf0b0 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 2 Dec 2019 17:19:00 -0800 Subject: [PATCH 05/67] GetPrefetchedTensors --- ngraph_bridge/ngraph_encapsulate_op.cc | 10 +++++++++- ngraph_bridge/ngraph_executor.cc | 5 +++-- ngraph_bridge/ngraph_tensor_manager.cc | 13 +++++++++++++ ngraph_bridge/ngraph_tensor_manager.h | 3 +++ test/test_ngraph_tensor_manager.cpp | 2 ++ 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 251b11c0f..a881e0a96 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -523,9 +523,15 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { std::tuple io_tensors_next_iter; io_tensors_next_iter = pipelined_tensor_store->get_tensors(); + + // Get prefetched inputs + pipelined_input_tensors_next_iter = get<1>(io_tensors_next_iter); + prefetched_input_tensors_next_iter = tensor_manager->GetPrefetchedTensors( + pipelined_input_tensors_next_iter); + // Save the ngTensors for the next iteration NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ - get<0>(io_tensors_next_iter), get<1>(io_tensors_next_iter)}; + get<0>(io_tensors_next_iter), prefetched_input_tensors_next_iter}; OP_REQUIRES(ctx, current_iter_pipeline_depth == (!next_input_tensor_bundle.Id), @@ -552,6 +558,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth << " skip count; " << skip_count; if (skip_count >= prefetch_buffer_depth) { + cout << "skip_tf2ng_copy true " << endl; // We have been using the pipelined tensors - therefore do the // following: // 1. Get the next set of IO tensors from the pipelined store @@ -586,6 +593,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { if (!skip_tf2ng_copy) { for (auto i = 0; i < tf_input_tensors.size(); i++) { + cout << "copying inputs true " << endl; ng::element::Type ng_element_type; OP_REQUIRES_OK(ctx, TFDataTypeToNGraphElementType( tf_input_tensors[i].dtype(), &ng_element_type)); diff --git a/ngraph_bridge/ngraph_executor.cc b/ngraph_bridge/ngraph_executor.cc index c60f49209..03153f80a 100644 --- a/ngraph_bridge/ngraph_executor.cc +++ b/ngraph_bridge/ngraph_executor.cc @@ -481,8 +481,9 @@ NGraphExecutor::InitializeIOTensorPipeline( // If the input or the output size if 0 then??? NGRAPH_VLOG(5) << "InitializeIOTensorPipeline: No. of Pipelined Inputs: " - << num_inputs - << " No. of Pipelined Pipelined Outputs: " << num_outputs; + << num_pipelined_inputs + << " No. of Pipelined Pipelined Outputs: " + << num_pipelined_outputs; PipelinedTensorMatrix pipelined_input_tensors(num_pipelined_inputs); PipelinedTensorMatrix pipelined_output_tensors(num_pipelined_outputs); diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index eb25252c3..f7757d4f5 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -95,5 +95,18 @@ void NGraphTensorManager::Initialize() { //--------------------------------------------------------------------------- NGraphTensorManager::~NGraphTensorManager() {} +//--------------------------------------------------------------------------- +// NGraphTensorManager::GetPrefetchedTensors +//--------------------------------------------------------------------------- +vector> GetPrefetchedTensors( + const vector>& pipelined_input_tensors) { + vector> prefetched_tensors; + auto prefetched_indexes = GetPipelinedInputIndexesThatArePrefetched(); + for (auto index : prefetched_indexes) { + prefetched_tensors.push_back(pipelined_input_tensors[index]); + } + return prefetched_tensors; +} + } // namespace ngraph_bridge } // namespace tensorflow \ No newline at end of file diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index eddf535c4..74351ee90 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -71,6 +71,9 @@ class NGraphTensorManager { return m_pipelined_input_indexes_prefetched; } + vector> GetPrefetchedTensors( + const vector>& pipelined_input_tensors); + private: void Initialize(); string m_ng_encap_node_name; diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index fdff539c2..a16db7f5f 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -326,6 +326,8 @@ TEST_F(NGraphTensorManagerTest, PrefetchNotInPipeline) { ClearCatalog(); } +TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors) {} + } // namespace testing } // namespace ngraph_bridge } // namespace tensorflow \ No newline at end of file From 2bf99a4577b1806c5bc103a25f45ba9957621f3c Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 2 Dec 2019 18:39:06 -0800 Subject: [PATCH 06/67] Added test --- ngraph_bridge/ngraph_encapsulate_op.cc | 9 +- ngraph_bridge/ngraph_tensor_manager.cc | 3 +- ngraph_bridge/ngraph_tensor_manager.h | 3 + test/test_ngraph_tensor_manager.cpp | 136 ++++++++++++++++++++++++- 4 files changed, 143 insertions(+), 8 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index a881e0a96..796336996 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -525,9 +525,12 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { io_tensors_next_iter = pipelined_tensor_store->get_tensors(); // Get prefetched inputs - pipelined_input_tensors_next_iter = get<1>(io_tensors_next_iter); - prefetched_input_tensors_next_iter = tensor_manager->GetPrefetchedTensors( - pipelined_input_tensors_next_iter); + vector> + pipelined_input_tensors_next_iter = get<1>(io_tensors_next_iter); + vector> + prefetched_input_tensors_next_iter = + tensor_manager->GetPrefetchedTensors( + pipelined_input_tensors_next_iter); // Save the ngTensors for the next iteration NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index f7757d4f5..11daeb218 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -98,7 +98,8 @@ NGraphTensorManager::~NGraphTensorManager() {} //--------------------------------------------------------------------------- // NGraphTensorManager::GetPrefetchedTensors //--------------------------------------------------------------------------- -vector> GetPrefetchedTensors( +vector> +NGraphTensorManager::GetPrefetchedTensors( const vector>& pipelined_input_tensors) { vector> prefetched_tensors; auto prefetched_indexes = GetPipelinedInputIndexesThatArePrefetched(); diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index 74351ee90..09060dd32 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -24,7 +24,10 @@ #include "tensorflow/core/common_runtime/dma_helper.h" +#include "ngraph/ngraph.hpp" + using namespace std; +namespace ng = ngraph; namespace tensorflow { namespace ngraph_bridge { diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index a16db7f5f..8b81510f4 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -17,6 +17,8 @@ #include "tensorflow/core/common_runtime/dma_helper.h" +#include "ngraph/ngraph.hpp" + #include "ngraph_bridge/ngraph_catalog.h" #include "ngraph_bridge/ngraph_tensor_manager.h" #include "ngraph_bridge/ngraph_utils.h" @@ -24,6 +26,7 @@ #include "test/test_utilities.h" using namespace std; +namespace ng = ngraph; namespace tensorflow { @@ -79,12 +82,24 @@ class NGraphTensorManagerTest : public ::testing::Test { iota(vout.begin(), vout.end(), 0); return vout; } + + // Creates ngraph tensor + shared_ptr CreateNGraphScalarTensor( + int value, string backend_type = "INTERPRETER") { + // create scalar tensor + ng::Shape ng_shape_scalar({}); + + // create Backend + auto backend = ng::runtime::Backend::create(backend_type); + + auto temp = backend->create_tensor(ng::element::i32, ng_shape_scalar); + + temp->write(&value, sizeof(value)); + return temp; + } }; TEST(NGraphUtils, FindComplement1) { - bool yes; - Status st = IsNgraphTFLogTensorCopiesEnabled(0, yes); - vector input = {0, 3, 5, 8, 9}; vector complement = FindComplement(10, input); @@ -326,7 +341,120 @@ TEST_F(NGraphTensorManagerTest, PrefetchNotInPipeline) { ClearCatalog(); } -TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors) {} +TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors) { + string ng_encap_node_name = "xyz_1"; + int ng_encap_cluster_id = 1; + int ng_encap_graph_id = 1; + int number_of_inputs = 5; + int number_of_outputs = 2; + + // expected + vector empty; + vector expected_pipelined_inp_indexes = FillRange(number_of_inputs); + vector expected_pipelined_out_indexes = FillRange(number_of_outputs); + vector expected_prefetched_inp_indexes = {1, 3}; + + EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, + expected_prefetched_inp_indexes); + + NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, + ng_encap_graph_id, number_of_inputs, + number_of_outputs); + + // Allocate tensors for arguments a, b, c + vector> pipelined_input_tensors( + number_of_inputs); + + for (int i = 0; i < number_of_inputs; i++) { + pipelined_input_tensors[i] = CreateNGraphScalarTensor(i); + } + + vector> prefetched_input_tensors = + tensor_manager.GetPrefetchedTensors(pipelined_input_tensors); + ASSERT_EQ(prefetched_input_tensors.size(), + expected_prefetched_inp_indexes.size()); + + for (int i = 0; i < expected_prefetched_inp_indexes.size(); i++) { + int tensor_val = 0; + prefetched_input_tensors[i]->read(&tensor_val, sizeof(tensor_val)); + ASSERT_EQ(tensor_val, expected_prefetched_inp_indexes[i]); + } + + // clean up + ClearCatalog(); +} + +TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors2) { + string ng_encap_node_name = "xyz_1"; + int ng_encap_cluster_id = 1; + int ng_encap_graph_id = 1; + int number_of_inputs = 7; + int number_of_outputs = 4; + + // expected + vector expected_pipelined_inp_indexes, expected_pipelined_out_indexes, + expected_var_inp_indexes, expected_var_out_indexes, + expected_out_indexes_need_copy, expected_prefetched_inp_indexes, + expected_pipelined_inp_indexes_prefetched; + + if (ngraph_tf_are_variables_enabled()) { + // expected values + expected_pipelined_inp_indexes = {1, 3, 4, 6}; + expected_prefetched_inp_indexes = {3, 6}; + expected_pipelined_inp_indexes_prefetched = {1, 3}; + expected_pipelined_out_indexes = {0, 2}; + expected_var_inp_indexes = + FindComplement(number_of_inputs, expected_pipelined_inp_indexes); + expected_var_out_indexes = + FindComplement(number_of_outputs, expected_pipelined_out_indexes); + expected_out_indexes_need_copy = {2, 3}; + // enter in catalog + EnterVarInCatalog(ng_encap_graph_id, ng_encap_node_name, + expected_var_inp_indexes, expected_var_out_indexes, + expected_out_indexes_need_copy); + + } else { + expected_pipelined_inp_indexes = FillRange(number_of_inputs); + expected_pipelined_out_indexes = FillRange(number_of_outputs); + expected_prefetched_inp_indexes = {3, 6}; + expected_pipelined_inp_indexes_prefetched = { + 3, 6}; // all inputs are pipelined + + expected_var_inp_indexes = {}; + expected_var_out_indexes = {}; + expected_out_indexes_need_copy = {}; + } + + EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, + expected_prefetched_inp_indexes); + + NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, + ng_encap_graph_id, number_of_inputs, + number_of_outputs); + + // Allocate tensors for arguments a, b, c + vector> pipelined_input_tensors( + expected_pipelined_inp_indexes.size()); + + for (int i = 0; i < pipelined_input_tensors.size(); i++) { + pipelined_input_tensors[i] = + CreateNGraphScalarTensor(expected_pipelined_inp_indexes[i]); + } + + vector> prefetched_input_tensors = + tensor_manager.GetPrefetchedTensors(pipelined_input_tensors); + ASSERT_EQ(prefetched_input_tensors.size(), + expected_prefetched_inp_indexes.size()); + + for (int i = 0; i < expected_prefetched_inp_indexes.size(); i++) { + int tensor_val = 0; + prefetched_input_tensors[i]->read(&tensor_val, sizeof(tensor_val)); + ASSERT_EQ(tensor_val, expected_prefetched_inp_indexes[i]); + } + + // clean up + ClearCatalog(); +} } // namespace testing } // namespace ngraph_bridge From ac9e32dea16a3c071048deeeaf146a3d423df770 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Tue, 3 Dec 2019 12:34:00 -0800 Subject: [PATCH 07/67] removed test --- test/test_ngraph_tensor_manager.cpp | 46 +---------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index 8b81510f4..e5d9056a2 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -341,50 +341,7 @@ TEST_F(NGraphTensorManagerTest, PrefetchNotInPipeline) { ClearCatalog(); } -TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors) { - string ng_encap_node_name = "xyz_1"; - int ng_encap_cluster_id = 1; - int ng_encap_graph_id = 1; - int number_of_inputs = 5; - int number_of_outputs = 2; - - // expected - vector empty; - vector expected_pipelined_inp_indexes = FillRange(number_of_inputs); - vector expected_pipelined_out_indexes = FillRange(number_of_outputs); - vector expected_prefetched_inp_indexes = {1, 3}; - - EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, - expected_prefetched_inp_indexes); - - NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, - ng_encap_graph_id, number_of_inputs, - number_of_outputs); - - // Allocate tensors for arguments a, b, c - vector> pipelined_input_tensors( - number_of_inputs); - - for (int i = 0; i < number_of_inputs; i++) { - pipelined_input_tensors[i] = CreateNGraphScalarTensor(i); - } - - vector> prefetched_input_tensors = - tensor_manager.GetPrefetchedTensors(pipelined_input_tensors); - ASSERT_EQ(prefetched_input_tensors.size(), - expected_prefetched_inp_indexes.size()); - - for (int i = 0; i < expected_prefetched_inp_indexes.size(); i++) { - int tensor_val = 0; - prefetched_input_tensors[i]->read(&tensor_val, sizeof(tensor_val)); - ASSERT_EQ(tensor_val, expected_prefetched_inp_indexes[i]); - } - - // clean up - ClearCatalog(); -} - -TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors2) { +TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors1) { string ng_encap_node_name = "xyz_1"; int ng_encap_cluster_id = 1; int ng_encap_graph_id = 1; @@ -432,7 +389,6 @@ TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors2) { ng_encap_graph_id, number_of_inputs, number_of_outputs); - // Allocate tensors for arguments a, b, c vector> pipelined_input_tensors( expected_pipelined_inp_indexes.size()); From 2db801d7a75abd9869f194be3be33a11d3ca3b79 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Tue, 3 Dec 2019 17:47:07 -0800 Subject: [PATCH 08/67] refactor pipeline --- ngraph_bridge/CMakeLists.txt | 1 + ngraph_bridge/ngraph_deassign_clusters.cc | 1 + .../ngraph_encapsulate_get_prefetch.cc | 125 ++++++++++++++++++ .../ngraph_encapsulate_get_prefetch.h | 33 +++++ ngraph_bridge/ngraph_encapsulate_op.cc | 104 ++------------- 5 files changed, 169 insertions(+), 95 deletions(-) create mode 100644 ngraph_bridge/ngraph_encapsulate_get_prefetch.cc create mode 100644 ngraph_bridge/ngraph_encapsulate_get_prefetch.h diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt index 0ceb40bd9..4c1b7a3cb 100644 --- a/ngraph_bridge/CMakeLists.txt +++ b/ngraph_bridge/CMakeLists.txt @@ -60,6 +60,7 @@ set(SRC tf_deadness_analysis.cc prefetch_autotuner.cc ngraph_prefetch_dataset_op.cc + ngraph_encasulate_get_prefetch.cc stats_utils.cc version.cc ) diff --git a/ngraph_bridge/ngraph_deassign_clusters.cc b/ngraph_bridge/ngraph_deassign_clusters.cc index 2f51b3650..3d4a1bc4d 100644 --- a/ngraph_bridge/ngraph_deassign_clusters.cc +++ b/ngraph_bridge/ngraph_deassign_clusters.cc @@ -13,6 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ + #include #include #include diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc new file mode 100644 index 000000000..c65fcb71a --- /dev/null +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc @@ -0,0 +1,125 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#include "ngraph_bridge/ngraph_backend_manager.h" + +using namespace std; +namespace ng = ngraph; + +namespace tensorflow { + +namespace ngraph_bridge { + +Status GetPrefetchTensors(Graph* graph) { + cout << "using prefetch env flag " << endl; + // Set the prefetch shared obj if applicable + NGraphPrefetchSharedResouce* shared_data = nullptr; + Status s = ctx->resource_manager()->Lookup( + NGraphPrefetchSharedResouce::CONTAINER_NAME, + NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); + + if (!s.ok()) { + // We are using this for the first time i.e., we need to do the following + // 1. Create the shared data object + // 2. We get another pipelined tensor pair for the current iteration and + // copy the TF tensor to this set and continue with the execution for + // for this iteration. + shared_data = new NGraphPrefetchSharedResouce( + name(), m_parallel_executor->GetOpBackendName(), + m_parallel_executor->GetGraphId(), + m_parallel_executor->GetNgraphClusterId()); + + // Get the set of IO tensors for the next iteration + std::tuple + io_tensors_next_iter; + io_tensors_next_iter = pipelined_tensor_store->get_tensors(); + // Get prefetched inputs + vector> pipelined_input_tensors_next_iter = + get<1>(io_tensors_next_iter); + vector> prefetched_input_tensors_next_iter = + tensor_manager->GetPrefetchedTensors(pipelined_input_tensors_next_iter); + + // Save the prefetched input ngTensors for the next iteration + NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ + get<0>(io_tensors_next_iter), prefetched_input_tensors_next_iter}; + + OP_REQUIRES(ctx, + current_iter_pipeline_depth == (!next_input_tensor_bundle.Id), + errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is also ", + next_input_tensor_bundle.Id)); + + shared_data->AddNextInputTensorBundleForDeviceTransfer( + next_input_tensor_bundle); + + ctx->SetStatus(ctx->resource_manager()->Create( + NGraphPrefetchSharedResouce::CONTAINER_NAME, + NGraphPrefetchSharedResouce::RESOURCE_NAME, shared_data)); + // Continue the execution with the currently supplied TF tensor for the + // last time + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " + "signal prefetching"; + } else { + cout << "using prefetch inputs " << endl; + + int prefetch_buffer_depth = shared_data->GetBufferDepth(); + int skip_count = shared_data->GetSkipCount(); + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth + << " skip count; " << skip_count; + if (skip_count >= prefetch_buffer_depth) { + cout << "skip_tf2ng_copy true " << endl; + // We have been using the pipelined tensors - therefore do the + // following: + // 1. Save the prefetched Input tensors for the current iteration + // to the shared data object so that the prefetcher + // can continue with copying the next set of inout tensor to the + // device + // 3. Execute the nGraph call for this iteration using the + // nG prefeteched input tensors we got from the shared data + + // Add the current prefetched tensors for the next iteration + // Get prefetched inputs + vector> prefetched_input_tensors = + tensor_manager->GetPrefetchedTensors(ng_inputs); + NGraphPrefetchSharedResouce::InputTensorBundle + prefetch_input_tensor_bundle{current_iter_pipeline_depth, + prefetched_input_tensors}; + shared_data->AddNextInputTensorBundleForDeviceTransfer( + prefetch_input_tensor_bundle); + + // Update the input_tensors with the one ready for exdcution + auto ng_input_tensor_bundle_ready = + shared_data->GetNextInputTensorBundleReadyForDeviceExecution(); + current_iter_pipeline_depth = ng_input_tensor_bundle_ready.Id; + vector> ng_prefetched_inputs = + ng_input_tensor_bundle_ready.Inputs; + OP_REQUIRES(ctx, current_iter_pipeline_depth == + (!prefetch_input_tensor_bundle.Id), + errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is ", "also ", + prefetch_input_tensor_bundle.Id)); + skip_tf2ng_copy = true; + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; + } + shared_data->IncrSkipCount(); + } +} +} + +} // namespace ngraph_bridge +} // namespace tensorflow diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h new file mode 100644 index 000000000..9f5b459e3 --- /dev/null +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright 2017-2019 Intel Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *******************************************************************************/ + +#ifndef NGRAPH_TF_BRIDGE_GET_PREFETCH_H +#define NGRAPH_TF_BRIDGE_GET_PREFETCH_H + +#pragma once + +#include "tensorflow/core/graph/graph.h" + +namespace tensorflow { + +namespace ngraph_bridge { + +Status GetPrefetchTensors(Graph* graph); + +} // namespace ngraph_bridge +} // namespace tensorflow + +#endif // NGRAPH_TF_BRIDGE_GET_PREFETCH_H diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 796336996..cedf7ce96 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -40,6 +40,7 @@ #include "ngraph_bridge/ngraph_backend_manager.h" #include "ngraph_bridge/ngraph_builder.h" #include "ngraph_bridge/ngraph_cluster_manager.h" +#include "ngraph_bridge/ngraph_encapsulate_get_prefetch.h" #include "ngraph_bridge/ngraph_encapsulate_impl.h" #include "ngraph_bridge/ngraph_encapsulate_op.h" #include "ngraph_bridge/ngraph_freshness_tracker.h" @@ -451,11 +452,6 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { errors::Internal("Pipeline Depth is not 2, got ", m_parallel_executor->GetTensorPipelineDepth())); - std::tuple io_tensors; - io_tensors = pipelined_tensor_store->get_tensors(); - OP_REQUIRES(ctx, !(std::get<0>(io_tensors) < 0), - errors::Internal("No free tensor available")); - // Get Tensor Manager and some error checking auto tensor_manager = m_parallel_executor->GetTensorManager(); int num_of_inputs = tensor_manager->GetNumberOfInputs(); @@ -487,6 +483,10 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ng_exec->get_results().size(), " do not match")); // create inputs, outputs, pipelineId + std::tuple io_tensors; + io_tensors = pipelined_tensor_store->get_tensors(); + OP_REQUIRES(ctx, !(std::get<0>(io_tensors) < 0), + errors::Internal("No free tensor available")); int current_iter_pipeline_depth = get<0>(io_tensors); vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); @@ -499,96 +499,10 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { bool skip_tf2ng_copy = false; if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != nullptr) { - cout << "using prefetch env flag " << endl; - NGraphPrefetchSharedResouce::InputTensorBundle prefetch_input_tensor_bundle{ - current_iter_pipeline_depth, ng_inputs}; - // Set the prefetch shared obj if applicable - NGraphPrefetchSharedResouce* shared_data = nullptr; - Status s = ctx->resource_manager()->Lookup( - NGraphPrefetchSharedResouce::CONTAINER_NAME, - NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); - - if (!s.ok()) { - // We are using this for the first time i.e., we need to do the following - // 1. Create the shared data object - // 2. save the input/output nG tensor set to the shared data object - // 3. Get another pipelined tensor pair for the current iteration and - // copy the TF tensor to this set and continue with the execution for - // for this iteration. - shared_data = new NGraphPrefetchSharedResouce( - name(), m_parallel_executor->GetOpBackendName(), - m_parallel_executor->GetGraphId(), - m_parallel_executor->GetNgraphClusterId()); - // Get the set of IO tensors for the next iteration - std::tuple - io_tensors_next_iter; - io_tensors_next_iter = pipelined_tensor_store->get_tensors(); - - // Get prefetched inputs - vector> - pipelined_input_tensors_next_iter = get<1>(io_tensors_next_iter); - vector> - prefetched_input_tensors_next_iter = - tensor_manager->GetPrefetchedTensors( - pipelined_input_tensors_next_iter); - - // Save the ngTensors for the next iteration - NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ - get<0>(io_tensors_next_iter), prefetched_input_tensors_next_iter}; - - OP_REQUIRES(ctx, - current_iter_pipeline_depth == (!next_input_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is also ", - next_input_tensor_bundle.Id)); - - shared_data->AddNextInputTensorBundleForDeviceTransfer( - next_input_tensor_bundle); - - ctx->SetStatus(ctx->resource_manager()->Create( - NGraphPrefetchSharedResouce::CONTAINER_NAME, - NGraphPrefetchSharedResouce::RESOURCE_NAME, shared_data)); - // Continue the execution with the currently supplied TF tensor for the - // last time - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " - "signal prefetching"; - } else { - cout << "using prefetch inputs " << endl; - - int prefetch_buffer_depth = shared_data->GetBufferDepth(); - int skip_count = shared_data->GetSkipCount(); - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth - << " skip count; " << skip_count; - if (skip_count >= prefetch_buffer_depth) { - cout << "skip_tf2ng_copy true " << endl; - // We have been using the pipelined tensors - therefore do the - // following: - // 1. Get the next set of IO tensors from the pipelined store - // 2. Save that to the shared data object so that the prefetcher - // can continue with copying the next set of inout tensor to the - // device - // 3. Execute the nGraph call for this iteration using the - // nG tensors we got from the shared data - auto ng_input_tensor_bundle_ready = - shared_data->GetNextInputTensorBundleReadyForDeviceExecution(); - // Add the next set of tensors for the next iteration - shared_data->AddNextInputTensorBundleForDeviceTransfer( - prefetch_input_tensor_bundle); - // Update the input_tensors with the one ready for exdcution - current_iter_pipeline_depth = ng_input_tensor_bundle_ready.Id; - ng_inputs = ng_input_tensor_bundle_ready.Inputs; - OP_REQUIRES(ctx, current_iter_pipeline_depth == - (!prefetch_input_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is ", - "also ", prefetch_input_tensor_bundle.Id)); - skip_tf2ng_copy = true; - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; - } - shared_data->IncrSkipCount(); - } + Status s = GetPrefetchTensors(&skip_tf2ng_copy); + OP_REQUIRES( + ctx, s.ok(), + errors::Internal("Error encountered when prefetching tensors: ")); } // Allocate the input/ From 9c0e03f207f0c26ae76fccf6df8d3e4dcf2e1ae5 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 11:20:26 -0800 Subject: [PATCH 09/67] Shared data keeps track of prefetched input indexes --- ngraph_bridge/ngraph_prefetch_dataset_op.cc | 22 +++++++++++++++++---- ngraph_bridge/ngraph_prefetch_shared_data.h | 12 ++++++++--- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/ngraph_bridge/ngraph_prefetch_dataset_op.cc b/ngraph_bridge/ngraph_prefetch_dataset_op.cc index 7ff974a1a..18b444611 100644 --- a/ngraph_bridge/ngraph_prefetch_dataset_op.cc +++ b/ngraph_bridge/ngraph_prefetch_dataset_op.cc @@ -417,8 +417,21 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { if (s.ok()) { ngraph::Event evt_dev_cp("Prf Dev Copy", "Copy", ""); shared_data->SetBufferDepth(m_buffer_size); + auto ng_input_tensor_bundle = shared_data->GetNextInputTensorBundleForDeviceTransfer(); + auto ng_prefetch_input_indexes = + shared_data->GetPrefetchInputIndexes(); + + int number_of_buffer_elements = buffer_element.value.size(); + if (number_of_buffer_elements != ng_prefetch_input_indexes.size()) { + throw std::runtime_error( + "Prefetch buffer elements size " + + to_string(number_of_buffer_elements) + + " does not match the number of prefetch inputs expected by " + "encap " + + to_string(ng_prefetch_input_indexes.size())); + } // Write to these tensors for (auto i = 0; i < buffer_element.value.size(); i++) { @@ -432,10 +445,11 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { NGRAPH_VLOG(2) << "[PREFETCH] INPUT tensor being written by Prefetch: " << " Value: " << buffer_element.value[i].DebugString(); - ng_input_tensor_bundle.Inputs[i]->write( - current_src_ptr, - ng_input_tensor_bundle.Inputs[i]->get_element_count() * - ng_element_type.size()); + int input_index = ng_prefetch_input_indexes[i]; + ng_input_tensor_bundle.Inputs[input_index]->write( + current_src_ptr, ng_input_tensor_bundle.Inputs[input_index] + ->get_element_count() * + ng_element_type.size()); } catch (const std::exception& exp) { throw exp; } catch (...) { diff --git a/ngraph_bridge/ngraph_prefetch_shared_data.h b/ngraph_bridge/ngraph_prefetch_shared_data.h index 6f140c56c..1fe9054c5 100644 --- a/ngraph_bridge/ngraph_prefetch_shared_data.h +++ b/ngraph_bridge/ngraph_prefetch_shared_data.h @@ -40,11 +40,13 @@ class NGraphPrefetchSharedResouce : public ResourceBase { public: explicit NGraphPrefetchSharedResouce(const std::string& ng_enc_op_name, const std::string& backend_name, - int cluster_id, int graph_id) + int cluster_id, int graph_id, + const vector prefetch_input_indexes) : m_ng_enc_op_name(ng_enc_op_name), m_backend_name(backend_name), m_graph_id(graph_id), - m_cluster_id(cluster_id) {} + m_cluster_id(cluster_id), + m_prefetch_input_indexes(prefetch_input_indexes) {} // Returns a debug string for *this. string DebugString() const override { return "NGraphPrefetchSharedResouce"; } @@ -115,12 +117,16 @@ class NGraphPrefetchSharedResouce : public ResourceBase { void IncrSkipCount() { m_skip_count++; } int GetSkipCount() { return m_skip_count; } + const vector& GetPrefetchInputIndexes() { + return m_prefetch_input_indexes; + } + private: const std::string m_ng_enc_op_name; const std::string m_backend_name; const int m_graph_id; const int m_cluster_id; - + const vector m_prefetch_input_indexes; // We need to maintain two queues as follows: // ----------+------------+------------+------------------------------------+ // Queue | Writer | Reader | Comments | From 8dfc7953bc778a2c54532562729f9b5a310f2d2a Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 13:12:06 -0800 Subject: [PATCH 10/67] Working state --- ngraph_bridge/CMakeLists.txt | 2 +- ngraph_bridge/ngraph_catalog.cc | 4 + .../ngraph_encapsulate_get_prefetch.cc | 101 +---------------- .../ngraph_encapsulate_get_prefetch.h | 2 +- ngraph_bridge/ngraph_encapsulate_op.cc | 107 ++++++++++++++++-- .../ngraph_enter_prefetch_in_catalog.cc | 4 + ngraph_bridge/ngraph_executor.cc | 5 +- ngraph_bridge/ngraph_executor.h | 3 +- ngraph_bridge/ngraph_tensor_manager.cc | 8 ++ test/python/test_api.py | 4 - test/test_parallel_executor.cpp | 27 +++-- 11 files changed, 140 insertions(+), 127 deletions(-) diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt index 4c1b7a3cb..f48aeef56 100644 --- a/ngraph_bridge/CMakeLists.txt +++ b/ngraph_bridge/CMakeLists.txt @@ -60,7 +60,7 @@ set(SRC tf_deadness_analysis.cc prefetch_autotuner.cc ngraph_prefetch_dataset_op.cc - ngraph_encasulate_get_prefetch.cc + ngraph_encapsulate_get_prefetch.cc stats_utils.cc version.cc ) diff --git a/ngraph_bridge/ngraph_catalog.cc b/ngraph_bridge/ngraph_catalog.cc index 95b65f506..fdb70790d 100644 --- a/ngraph_bridge/ngraph_catalog.cc +++ b/ngraph_bridge/ngraph_catalog.cc @@ -220,17 +220,21 @@ void NGraphCatalog::AddToPrefetchedInputIndexMap( throw runtime_error("Trying to add an already existing key ( " + key + " ) in PrefetchedInputIndexMap "); } + cout << " AddToPrefetchedInputIndexMap key " << key << endl; NGraphCatalog::prefetched_input_index_map_.insert({key, val}); } bool NGraphCatalog::ExistsInPrefetchedInputIndexMap(const int& graphid, const string& node_name) { string key = NGraphCatalog::CreateNodeKey(graphid, node_name); + cout << " ExistsInPrefetchedInputIndexMap key " << key << endl; return NGraphCatalog::ExistsInPrefetchedInputIndexMap(key); } bool NGraphCatalog::ExistsInPrefetchedInputIndexMap(const string& key) { auto itr = NGraphCatalog::prefetched_input_index_map_.find(key); + cout << " ExistsInPrefetchedInputIndexMap check " + << (itr != NGraphCatalog::prefetched_input_index_map_.end()) << endl; return itr != NGraphCatalog::prefetched_input_index_map_.end(); } diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc index c65fcb71a..b3c70c227 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc @@ -14,112 +14,15 @@ * limitations under the License. *******************************************************************************/ -#include "ngraph_bridge/ngraph_backend_manager.h" +#include "ngraph_bridge/ngraph_encapsulate_get_prefetch.h" using namespace std; -namespace ng = ngraph; namespace tensorflow { namespace ngraph_bridge { -Status GetPrefetchTensors(Graph* graph) { - cout << "using prefetch env flag " << endl; - // Set the prefetch shared obj if applicable - NGraphPrefetchSharedResouce* shared_data = nullptr; - Status s = ctx->resource_manager()->Lookup( - NGraphPrefetchSharedResouce::CONTAINER_NAME, - NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); - - if (!s.ok()) { - // We are using this for the first time i.e., we need to do the following - // 1. Create the shared data object - // 2. We get another pipelined tensor pair for the current iteration and - // copy the TF tensor to this set and continue with the execution for - // for this iteration. - shared_data = new NGraphPrefetchSharedResouce( - name(), m_parallel_executor->GetOpBackendName(), - m_parallel_executor->GetGraphId(), - m_parallel_executor->GetNgraphClusterId()); - - // Get the set of IO tensors for the next iteration - std::tuple - io_tensors_next_iter; - io_tensors_next_iter = pipelined_tensor_store->get_tensors(); - // Get prefetched inputs - vector> pipelined_input_tensors_next_iter = - get<1>(io_tensors_next_iter); - vector> prefetched_input_tensors_next_iter = - tensor_manager->GetPrefetchedTensors(pipelined_input_tensors_next_iter); - - // Save the prefetched input ngTensors for the next iteration - NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ - get<0>(io_tensors_next_iter), prefetched_input_tensors_next_iter}; - - OP_REQUIRES(ctx, - current_iter_pipeline_depth == (!next_input_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is also ", - next_input_tensor_bundle.Id)); - - shared_data->AddNextInputTensorBundleForDeviceTransfer( - next_input_tensor_bundle); - - ctx->SetStatus(ctx->resource_manager()->Create( - NGraphPrefetchSharedResouce::CONTAINER_NAME, - NGraphPrefetchSharedResouce::RESOURCE_NAME, shared_data)); - // Continue the execution with the currently supplied TF tensor for the - // last time - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " - "signal prefetching"; - } else { - cout << "using prefetch inputs " << endl; - - int prefetch_buffer_depth = shared_data->GetBufferDepth(); - int skip_count = shared_data->GetSkipCount(); - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth - << " skip count; " << skip_count; - if (skip_count >= prefetch_buffer_depth) { - cout << "skip_tf2ng_copy true " << endl; - // We have been using the pipelined tensors - therefore do the - // following: - // 1. Save the prefetched Input tensors for the current iteration - // to the shared data object so that the prefetcher - // can continue with copying the next set of inout tensor to the - // device - // 3. Execute the nGraph call for this iteration using the - // nG prefeteched input tensors we got from the shared data - - // Add the current prefetched tensors for the next iteration - // Get prefetched inputs - vector> prefetched_input_tensors = - tensor_manager->GetPrefetchedTensors(ng_inputs); - NGraphPrefetchSharedResouce::InputTensorBundle - prefetch_input_tensor_bundle{current_iter_pipeline_depth, - prefetched_input_tensors}; - shared_data->AddNextInputTensorBundleForDeviceTransfer( - prefetch_input_tensor_bundle); - - // Update the input_tensors with the one ready for exdcution - auto ng_input_tensor_bundle_ready = - shared_data->GetNextInputTensorBundleReadyForDeviceExecution(); - current_iter_pipeline_depth = ng_input_tensor_bundle_ready.Id; - vector> ng_prefetched_inputs = - ng_input_tensor_bundle_ready.Inputs; - OP_REQUIRES(ctx, current_iter_pipeline_depth == - (!prefetch_input_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is ", "also ", - prefetch_input_tensor_bundle.Id)); - skip_tf2ng_copy = true; - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; - } - shared_data->IncrSkipCount(); - } -} -} +Status GetPrefetchTensors() { return Status::OK(); } } // namespace ngraph_bridge } // namespace tensorflow diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h index 9f5b459e3..cc09f3acf 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h @@ -25,7 +25,7 @@ namespace tensorflow { namespace ngraph_bridge { -Status GetPrefetchTensors(Graph* graph); +Status GetPrefetchTensors(); } // namespace ngraph_bridge } // namespace tensorflow diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index cedf7ce96..500443255 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -121,8 +121,9 @@ void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, OP_REQUIRES_OK(ctx, ctx->GetAttr("ngraph_cluster", &cluster_id)); graph_def = NGraphClusterManager::GetClusterGraph(cluster_id); + string node_name = name(); if (graph_def == nullptr) { - string flib_key = "ngraph_cluster_" + to_string(cluster_id); + string flib_key = node_name; // Read graphdef from function library const FunctionLibraryDefinition flib = *ctx->function_library()->GetFunctionLibraryDefinition(); @@ -158,9 +159,9 @@ void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, } // Create the Executor object - m_parallel_executor = move(unique_ptr( - new NGraphExecutor(s_instance_id, cluster_id, graph_id, encap_subgraph, - backend_name, my_function_cache_depth_in_items))); + m_parallel_executor = move(unique_ptr(new NGraphExecutor( + s_instance_id, cluster_id, graph_id, encap_subgraph, backend_name, + my_function_cache_depth_in_items, node_name))); auto tensor_manager = m_parallel_executor->GetTensorManager(); OP_REQUIRES(ctx, tensor_manager->GetNumberOfInputs() == ctx->num_inputs(), @@ -499,10 +500,100 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { bool skip_tf2ng_copy = false; if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != nullptr) { - Status s = GetPrefetchTensors(&skip_tf2ng_copy); - OP_REQUIRES( - ctx, s.ok(), - errors::Internal("Error encountered when prefetching tensors: ")); + cout << "using prefetch env flag " << endl; + // Set the prefetch shared obj if applicable + NGraphPrefetchSharedResouce* shared_data = nullptr; + Status s = ctx->resource_manager()->Lookup( + NGraphPrefetchSharedResouce::CONTAINER_NAME, + NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); + + if (!s.ok()) { + // We are using this for the first time i.e., we need to do the following + // 1. Create the shared data object + // 2. We get another pipelined tensor pair for the current iteration and + // copy the TF tensor to this set and continue with the execution for + // for this iteration. + auto ng_prefetch_input_indexes = + tensor_manager->GetPipelinedInputIndexesThatArePrefetched(); + cout << "ng_prefetch_input_indexes " << ng_prefetch_input_indexes.size() + << endl; + + for (auto inp : ng_prefetch_input_indexes) { + cout << " inp indez " << inp << endl; + } + shared_data = new NGraphPrefetchSharedResouce( + name(), m_parallel_executor->GetOpBackendName(), + m_parallel_executor->GetGraphId(), + m_parallel_executor->GetNgraphClusterId(), ng_prefetch_input_indexes); + + // Get the set of IO tensors for the next iteration + std::tuple + io_tensors_next_iter; + io_tensors_next_iter = pipelined_tensor_store->get_tensors(); + + // Save the prefetched input ngTensors for the next iteration + NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ + get<0>(io_tensors_next_iter), get<1>(io_tensors_next_iter)}; + + OP_REQUIRES(ctx, + current_iter_pipeline_depth == (!next_input_tensor_bundle.Id), + errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is also ", + next_input_tensor_bundle.Id)); + + shared_data->AddNextInputTensorBundleForDeviceTransfer( + next_input_tensor_bundle); + + ctx->SetStatus(ctx->resource_manager()->Create( + NGraphPrefetchSharedResouce::CONTAINER_NAME, + NGraphPrefetchSharedResouce::RESOURCE_NAME, shared_data)); + // Continue the execution with the currently supplied TF tensor for the + // last time + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " + "signal prefetching"; + } else { + cout << "using prefetch inputs " << endl; + + int prefetch_buffer_depth = shared_data->GetBufferDepth(); + int skip_count = shared_data->GetSkipCount(); + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth + << " skip count; " << skip_count; + if (skip_count >= prefetch_buffer_depth) { + cout << "skip_tf2ng_copy true " << endl; + // We have been using the pipelined tensors - therefore do the + // following: + // 1. Save the prefetched Input tensors for the current iteration + // to the shared data object so that the prefetcher + // can continue with copying the next set of inout tensor to the + // device + // 3. Execute the nGraph call for this iteration using the + // nG prefeteched input tensors we got from the shared data + + // Add the current prefetched tensors for the next iteration + // Get prefetched inputs + NGraphPrefetchSharedResouce::InputTensorBundle + prefetch_input_tensor_bundle{current_iter_pipeline_depth, + ng_inputs}; + shared_data->AddNextInputTensorBundleForDeviceTransfer( + prefetch_input_tensor_bundle); + + // Update the input_tensors with the one ready for exdcution + auto ng_input_tensor_bundle_ready = + shared_data->GetNextInputTensorBundleReadyForDeviceExecution(); + current_iter_pipeline_depth = ng_input_tensor_bundle_ready.Id; + ng_inputs = ng_input_tensor_bundle_ready.Inputs; + OP_REQUIRES(ctx, current_iter_pipeline_depth == + (!prefetch_input_tensor_bundle.Id), + errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is ", + "also ", prefetch_input_tensor_bundle.Id)); + skip_tf2ng_copy = true; + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; + } + shared_data->IncrSkipCount(); + } } // Allocate the input/ diff --git a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc index 356e907f7..84513087f 100644 --- a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc +++ b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc @@ -62,6 +62,10 @@ Status EnterPrefetchInCatalog(Graph* graph, int graph_id) { } // end loop over input edges if (in_indexes_for_encap.size() > 0) { + for (auto i : in_indexes_for_encap) { + cout << "Enter Prefetch in catalog " << i << endl; + } + try { NGraphCatalog::AddToPrefetchedInputIndexMap(graph_id, node->name(), in_indexes_for_encap); diff --git a/ngraph_bridge/ngraph_executor.cc b/ngraph_bridge/ngraph_executor.cc index 03153f80a..be076ec61 100644 --- a/ngraph_bridge/ngraph_executor.cc +++ b/ngraph_bridge/ngraph_executor.cc @@ -62,13 +62,14 @@ namespace ngraph_bridge { NGraphExecutor::NGraphExecutor(int instance_id, int cluster_id, int graph_id, unique_ptr& graph, const string& backend_name, - const int cache_depth) + const int cache_depth, const string& node_name) : m_instance_id(instance_id), m_ngraph_cluster_id(cluster_id), m_graph_id(graph_id), m_graph(std::move(graph)), m_op_backend_name(backend_name), - m_ng_data_cache(cache_depth) { + m_ng_data_cache(cache_depth), + m_node_name(node_name) { // Sanity checks if (m_graph == nullptr) { throw std::runtime_error("Graph is nullptr!"); diff --git a/ngraph_bridge/ngraph_executor.h b/ngraph_bridge/ngraph_executor.h index a71851c2c..e5e554b85 100644 --- a/ngraph_bridge/ngraph_executor.h +++ b/ngraph_bridge/ngraph_executor.h @@ -42,7 +42,8 @@ class NGraphExecutor { // Transforms, compiles and executes TesnorFlow computation graph using nGraph explicit NGraphExecutor(int instance_id, int cluster_id, int graph_id, unique_ptr& graph, - const string& backend_name, const int cache_depth); + const string& backend_name, const int cache_depth, + const string& node_name); ~NGraphExecutor(); diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 11daeb218..9576a7732 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -64,16 +64,24 @@ void NGraphTensorManager::Initialize() { m_pipelined_output_indexes = FindComplement(m_number_of_outputs, m_output_indexes_assigning_variable); + cout << "TM m_ng_encap_graph_id " << m_ng_encap_graph_id << endl; + cout << "TM m_ng_encap_node_name " << m_ng_encap_node_name << endl; if (NGraphCatalog::ExistsInPrefetchedInputIndexMap(m_ng_encap_graph_id, m_ng_encap_node_name)) { auto prefetch_indexes = NGraphCatalog::GetIndexesFromPrefetchedInputIndexMap( m_ng_encap_graph_id, m_ng_encap_node_name); + for (auto i : m_prefetched_input_indexes) { + cout << "TM " << i << endl; + } m_prefetched_input_indexes.insert(m_prefetched_input_indexes.begin(), prefetch_indexes.begin(), prefetch_indexes.end()); // keeping the indexes sorted, is helpful in general testing sort(m_prefetched_input_indexes.begin(), m_prefetched_input_indexes.end()); + for (auto i : m_prefetched_input_indexes) { + cout << "TM " << i << endl; + } } // the prefetched input indexes will also be pipelined diff --git a/test/python/test_api.py b/test/python/test_api.py index 7859bb1b9..0bb347ddc 100644 --- a/test/python/test_api.py +++ b/test/python/test_api.py @@ -38,10 +38,6 @@ def test_enable(self): def test_backends_len(self): assert ngraph_bridge.backends_len() - def test_set_backend(self): - ngraph_bridge.set_backend('CPU') - assert ngraph_bridge.get_currently_set_backend_name() == "CPU" - def test_set_backend_invalid(self): try: ngraph_bridge.set_backend('POTATO') diff --git a/test/test_parallel_executor.cpp b/test/test_parallel_executor.cpp index e73b39ee6..39c33c58f 100644 --- a/test/test_parallel_executor.cpp +++ b/test/test_parallel_executor.cpp @@ -60,13 +60,13 @@ TEST(ParallelExecutor, Construction) { // First test with a backend not yet created unique_ptr executor; - ASSERT_THROW(executor = unique_ptr( - new NGraphExecutor(100, 500, 600, input_graph, "bogus", 5)), + ASSERT_THROW(executor = unique_ptr(new NGraphExecutor( + 100, 500, 600, input_graph, "bogus", 5, "xyz_5")), std::runtime_error); // Next test with a null graph not yet created - ASSERT_THROW(executor = unique_ptr( - new NGraphExecutor(100, 500, 600, input_graph, "bogus", 12)), + ASSERT_THROW(executor = unique_ptr(new NGraphExecutor( + 100, 500, 600, input_graph, "bogus", 12, "xyz_12")), std::runtime_error); // Now read the graph @@ -74,8 +74,9 @@ TEST(ParallelExecutor, Construction) { // Next test with a backend after creating tf::ngraph_bridge::BackendManager::CreateBackend("INTERPRETER"); - ASSERT_NO_THROW(executor = unique_ptr(new NGraphExecutor( - 100, 500, 600, input_graph, "INTERPRETER", 16))); + ASSERT_NO_THROW( + executor = unique_ptr(new NGraphExecutor( + 100, 500, 600, input_graph, "INTERPRETER", 16, "xyz_10"))); // Now that the object has been cobstructed, test various internal parts // TODO: Create a Test Class and mark that as a friend of the Executor class @@ -93,7 +94,8 @@ TEST(ParallelExecutor, CompilerTest) { ASSERT_OK(LoadGraphFromPbTxt("test_axpy_launchop.pbtxt", input_graph)); tf::ngraph_bridge::BackendManager::CreateBackend("INTERPRETER"); - NGraphExecutor executor(100, 500, 600, input_graph, "INTERPRETER", 10); + NGraphExecutor executor(100, 500, 600, input_graph, "INTERPRETER", 10, + "xyz_10"); // Create the inputs for this graph Tensor x(DT_FLOAT, TensorShape({2, 3})); @@ -132,7 +134,8 @@ TEST(ParallelExecutor, ExecuteOnSingleThread) { unique_ptr input_graph; ASSERT_OK(LoadGraphFromPbTxt("test_axpy_launchop.pbtxt", input_graph)); tf::ngraph_bridge::BackendManager::CreateBackend("INTERPRETER"); - NGraphExecutor executor(100, 500, 600, input_graph, "INTERPRETER", 12); + NGraphExecutor executor(100, 500, 600, input_graph, "INTERPRETER", 12, + "xyz_12"); // Create the inputs for this graph Tensor x(DT_FLOAT, TensorShape({2, 3})); @@ -214,7 +217,7 @@ TEST(ParallelExecutor, ExecuteOnSingleThread8Bit) { } tf::ngraph_bridge::BackendManager::CreateBackend(backend_name); - NGraphExecutor executor(100, 500, 600, input_graph, backend_name, 5); + NGraphExecutor executor(100, 500, 600, input_graph, backend_name, 5, "xyz_5"); // Create the inputs for this graph Tensor x(DT_INT8, TensorShape({2, 2})); @@ -296,7 +299,8 @@ TEST(ParallelExecutor, ExecuteOnMultipleThreads8Bit) { } tf::ngraph_bridge::BackendManager::CreateBackend(backend_name); - NGraphExecutor executor(100, 500, 600, input_graph, backend_name, 16); + NGraphExecutor executor(100, 500, 600, input_graph, backend_name, 16, + "xyz_16"); // Create the inputs for this graph Tensor x(DT_INT8, TensorShape({2, 2})); @@ -383,7 +387,8 @@ TEST(ParallelExecutor, ExecuteOnMultipleThreads) { unique_ptr input_graph; ASSERT_OK(LoadGraphFromPbTxt("test_axpy_launchop.pbtxt", input_graph)); tf::ngraph_bridge::BackendManager::CreateBackend("INTERPRETER"); - NGraphExecutor executor(100, 500, 600, input_graph, "INTERPRETER", 16); + NGraphExecutor executor(100, 500, 600, input_graph, "INTERPRETER", 16, + "xyz_16"); // Create the inputs for this graph Tensor x(DT_FLOAT, TensorShape({2, 3})); From 5464b7a42f6eee72d9be9013e4908285872de91c Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 14:40:02 -0800 Subject: [PATCH 11/67] bazel fix --- bazel/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bazel/BUILD b/bazel/BUILD index ce0025adb..15964fd1e 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -38,6 +38,7 @@ cc_library( "ngraph_bridge/ngraph_encapsulate_op.h", "ngraph_bridge/ngraph_data_cache.h", "ngraph_bridge/ngraph_find_replace_prefetchdataset.h", + "ngraph_bridge/ngraph_encapsulate_get_prefetch.h", "ngraph_bridge/ngraph_freshness_tracker.h", "ngraph_bridge/ngraph_mark_for_clustering.h", "ngraph_bridge/ngraph_partial_shapes.h", @@ -79,6 +80,7 @@ cc_library( "ngraph_bridge/ngraph_encapsulate_clusters.cc", "ngraph_bridge/ngraph_encapsulate_impl.cc", "ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc", + "ngraph_bridge/ngraph_encapsulate_get_prefetch.cc", "ngraph_bridge/ngraph_executor.cc", "ngraph_bridge/ngraph_encapsulate_op.cc", "ngraph_bridge/ngraph_freshness_tracker.cc", From 5233e693dd7930754fcbea5ba6e6f9e71c0d4eab Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 14:42:21 -0800 Subject: [PATCH 12/67] Added test --- examples/axpy_pipelined_extended.py | 92 +++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) create mode 100644 examples/axpy_pipelined_extended.py diff --git a/examples/axpy_pipelined_extended.py b/examples/axpy_pipelined_extended.py new file mode 100644 index 000000000..46c2b8baf --- /dev/null +++ b/examples/axpy_pipelined_extended.py @@ -0,0 +1,92 @@ +# ============================================================================== +# Copyright 2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) +import numpy as np + +import tensorflow as tf +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +import os +import ngraph_bridge + +import sys + + +def build_simple_model(input_array): + # Convert the numpy array to TF Tensor + input = tf.cast(input_array, tf.float32) + + # Define the Ops + mul = tf.compat.v1.math.multiply(input_array, 5) + add = tf.compat.v1.math.add(mul, 10) + output = add + return output + + +def build_data_pipeline(input_array, map_function, batch_size): + dataset = (tf.data.Dataset.from_tensor_slices( + (tf.constant(input_array) + )).map(map_function).batch(batch_size).prefetch(1)) + + iterator = dataset.make_initializable_iterator() + data_to_be_prefetched_and_used = iterator.get_next() + + return data_to_be_prefetched_and_used, iterator + + +def run_axpy_pipeline(): + input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] + expected_output_array = [-1, -1, 1, -1, -1, -1, -1, -1, -1] + output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] + multiplier = 10 + + for i in range(1, 10): + input_array[i - 1] = input_array[i - 1] * i * multiplier + map_function = lambda x: x * multiplier + batch_size = 1 + pipeline, iterator = build_data_pipeline(input_array, map_function, + batch_size) + model = build_simple_model(pipeline) + + with tf.Session() as sess: + # Initialize the globals and the dataset + sess.run(tf.global_variables_initializer()) + sess.run(iterator.initializer) + + for i in range(1, 10): + # Expected value is: + expected_output_array[i - 1] = ( + (input_array[i - 1] * multiplier) * 5) + 10 + + # Run one iteration + output = sess.run(model) + output_array[i - 1] = output[0] + return input_array, output_array, expected_output_array + + +def main(_): + input_array, output_array, expected_output_array = run_axpy_pipeline() + for i in range(1, 10): + print("Iteration:", i, " Input: ", input_array[i - 1], " Output: ", + output_array[i - 1], " Expected: ", expected_output_array[i - 1]) + sys.stdout.flush() + + +if __name__ == '__main__': + os.environ['NGRAPH_TF_BACKEND'] = "INTERPRETER" + #os.environ['NGRAPH_TF_USE_PREFETCH'] = "1" + tf.app.run(main=main) From 94cc4506288b157f7049eb0cbd855e1da15a2777 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 16:09:44 -0800 Subject: [PATCH 13/67] Changed tests. Put pipelined io tensors together to avoid unnecessary complications --- examples/axpy_pipelined.py | 1 - examples/axpy_pipelined_extended.py | 34 +++++++++------- ngraph_bridge/ngraph_encapsulate_op.cc | 45 +++++++++++---------- ngraph_bridge/ngraph_prefetch_dataset_op.cc | 4 +- ngraph_bridge/ngraph_prefetch_shared_data.h | 36 +++++++++-------- 5 files changed, 65 insertions(+), 55 deletions(-) diff --git a/examples/axpy_pipelined.py b/examples/axpy_pipelined.py index 46c2b8baf..8bf3dc2ef 100644 --- a/examples/axpy_pipelined.py +++ b/examples/axpy_pipelined.py @@ -64,7 +64,6 @@ def run_axpy_pipeline(): with tf.Session() as sess: # Initialize the globals and the dataset - sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) for i in range(1, 10): diff --git a/examples/axpy_pipelined_extended.py b/examples/axpy_pipelined_extended.py index 46c2b8baf..51d3d95e8 100644 --- a/examples/axpy_pipelined_extended.py +++ b/examples/axpy_pipelined_extended.py @@ -18,6 +18,7 @@ import numpy as np import tensorflow as tf +from tensorflow.python.framework import dtypes tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) import os @@ -26,15 +27,18 @@ import sys -def build_simple_model(input_array): +def build_simple_model(input_array, c1, c2): # Convert the numpy array to TF Tensor - input = tf.cast(input_array, tf.float32) + input_f = tf.cast(input_array, tf.float32) # Define the Ops - mul = tf.compat.v1.math.multiply(input_array, 5) - add = tf.compat.v1.math.add(mul, 10) - output = add - return output + pl = tf.placeholder(dtype=dtypes.int32) + pl_f = tf.cast(pl, tf.float32) + mul = tf.compat.v1.math.multiply(input_f, c1) + add = tf.compat.v1.math.add(mul, c2) + add2 = add + pl_f + output = add2 + return output, pl def build_data_pipeline(input_array, map_function, batch_size): @@ -52,28 +56,30 @@ def run_axpy_pipeline(): input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] expected_output_array = [-1, -1, 1, -1, -1, -1, -1, -1, -1] output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] - multiplier = 10 + map_multiplier = 10 - for i in range(1, 10): - input_array[i - 1] = input_array[i - 1] * i * multiplier - map_function = lambda x: x * multiplier + map_function = lambda x: x * map_multiplier batch_size = 1 pipeline, iterator = build_data_pipeline(input_array, map_function, batch_size) - model = build_simple_model(pipeline) + + # some constants + c1 = 5.0 + c2 = 10.0 + model, pl = build_simple_model(pipeline, c1, c2) with tf.Session() as sess: # Initialize the globals and the dataset - sess.run(tf.global_variables_initializer()) sess.run(iterator.initializer) for i in range(1, 10): # Expected value is: + # Change it to run on TF if the model gets too complex expected_output_array[i - 1] = ( - (input_array[i - 1] * multiplier) * 5) + 10 + (input_array[i - 1] * map_multiplier) * c1) + c2 + i # Run one iteration - output = sess.run(model) + output = sess.run(model, feed_dict={pl: i}) output_array[i - 1] = output[0] return input_array, output_array, expected_output_array diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 500443255..bb7f9d512 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -532,18 +532,19 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { io_tensors_next_iter = pipelined_tensor_store->get_tensors(); // Save the prefetched input ngTensors for the next iteration - NGraphPrefetchSharedResouce::InputTensorBundle next_input_tensor_bundle{ - get<0>(io_tensors_next_iter), get<1>(io_tensors_next_iter)}; + NGraphPrefetchSharedResouce::IOTensorBundle next_io_tensor_bundle{ + get<0>(io_tensors_next_iter), get<1>(io_tensors_next_iter), + get<2>(io_tensors_next_iter)}; OP_REQUIRES(ctx, - current_iter_pipeline_depth == (!next_input_tensor_bundle.Id), + current_iter_pipeline_depth == (!next_io_tensor_bundle.Id), errors::Internal("Current Pipeline Depth is ", current_iter_pipeline_depth, " and next iter pipeline depth is also ", - next_input_tensor_bundle.Id)); + next_io_tensor_bundle.Id)); - shared_data->AddNextInputTensorBundleForDeviceTransfer( - next_input_tensor_bundle); + shared_data->AddNextIOTensorBundleForDeviceTransfer( + next_io_tensor_bundle); ctx->SetStatus(ctx->resource_manager()->Create( NGraphPrefetchSharedResouce::CONTAINER_NAME, @@ -563,7 +564,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { cout << "skip_tf2ng_copy true " << endl; // We have been using the pipelined tensors - therefore do the // following: - // 1. Save the prefetched Input tensors for the current iteration + // 1. Save the prefetched Input/Output tensors for the current iteration // to the shared data object so that the prefetcher // can continue with copying the next set of inout tensor to the // device @@ -572,23 +573,23 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // Add the current prefetched tensors for the next iteration // Get prefetched inputs - NGraphPrefetchSharedResouce::InputTensorBundle - prefetch_input_tensor_bundle{current_iter_pipeline_depth, - ng_inputs}; - shared_data->AddNextInputTensorBundleForDeviceTransfer( - prefetch_input_tensor_bundle); + NGraphPrefetchSharedResouce::IOTensorBundle prefetch_io_tensor_bundle{ + current_iter_pipeline_depth, ng_inputs, ng_outputs}; + shared_data->AddNextIOTensorBundleForDeviceTransfer( + prefetch_io_tensor_bundle); // Update the input_tensors with the one ready for exdcution - auto ng_input_tensor_bundle_ready = - shared_data->GetNextInputTensorBundleReadyForDeviceExecution(); - current_iter_pipeline_depth = ng_input_tensor_bundle_ready.Id; - ng_inputs = ng_input_tensor_bundle_ready.Inputs; - OP_REQUIRES(ctx, current_iter_pipeline_depth == - (!prefetch_input_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is ", - "also ", prefetch_input_tensor_bundle.Id)); + auto ng_io_tensor_bundle_ready = + shared_data->GetNextIOTensorBundleReadyForDeviceExecution(); + current_iter_pipeline_depth = ng_io_tensor_bundle_ready.Id; + ng_inputs = ng_io_tensor_bundle_ready.Inputs; + ng_outputs = ng_io_tensor_bundle_ready.Outputs; + OP_REQUIRES( + ctx, current_iter_pipeline_depth == (!prefetch_io_tensor_bundle.Id), + errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is ", "also ", + prefetch_io_tensor_bundle.Id)); skip_tf2ng_copy = true; NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; } diff --git a/ngraph_bridge/ngraph_prefetch_dataset_op.cc b/ngraph_bridge/ngraph_prefetch_dataset_op.cc index 18b444611..e7c5e737a 100644 --- a/ngraph_bridge/ngraph_prefetch_dataset_op.cc +++ b/ngraph_bridge/ngraph_prefetch_dataset_op.cc @@ -419,7 +419,7 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { shared_data->SetBufferDepth(m_buffer_size); auto ng_input_tensor_bundle = - shared_data->GetNextInputTensorBundleForDeviceTransfer(); + shared_data->GetNextIOTensorBundleForDeviceTransfer(); auto ng_prefetch_input_indexes = shared_data->GetPrefetchInputIndexes(); @@ -459,7 +459,7 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { } // Now add them back to the other queue - shared_data->AddNextInputTensorBundleReadyForDeviceExecution( + shared_data->AddNextIOTensorBundleReadyForDeviceExecution( ng_input_tensor_bundle); shared_data->Unref(); evt_dev_cp.Stop(); diff --git a/ngraph_bridge/ngraph_prefetch_shared_data.h b/ngraph_bridge/ngraph_prefetch_shared_data.h index 1fe9054c5..066ce54a3 100644 --- a/ngraph_bridge/ngraph_prefetch_shared_data.h +++ b/ngraph_bridge/ngraph_prefetch_shared_data.h @@ -63,33 +63,36 @@ class NGraphPrefetchSharedResouce : public ResourceBase { static constexpr const char* NGRAPH_TF_USE_PREFETCH = "NGRAPH_TF_USE_PREFETCH"; - struct InputTensorBundle { + struct IOTensorBundle { int Id; std::vector> Inputs; + std::vector> Outputs; }; - // Adds the given nGraph input tensors to write to + // Adds the given nGraph input output tensors to write to + // Uses m_prefetch_input_indexes to figure out which input tensors + // are prefetched and writes into them // This is called by the NGraphEncapOp - void AddNextInputTensorBundleForDeviceTransfer(InputTensorBundle next) { + void AddNextIOTensorBundleForDeviceTransfer(IOTensorBundle next) { m_tf_2_ng.Add(std::move(next)); } - // Returns the Input tensors to be used to copy TF tensors to NG device + // Returns the Input output tensors to be used to copy TF tensors to NG device // This will be called by the prefetcher - InputTensorBundle GetNextInputTensorBundleForDeviceTransfer() { + IOTensorBundle GetNextIOTensorBundleForDeviceTransfer() { return std::move(m_tf_2_ng.GetNextAvailable()); } - // Adds the given nGraph input tensors to write to + // Adds the given nGraph input output tensors to write to // This is called by the prefetcher to add Tensors that are copied // from TF tensor and are now ready for the next iteration - void AddNextInputTensorBundleReadyForDeviceExecution(InputTensorBundle next) { + void AddNextIOTensorBundleReadyForDeviceExecution(IOTensorBundle next) { m_ng_2_tf.Add(std::move(next)); } - // Returns the Input tensors to be ready to be executed by NG device + // Returns the Input output tensors to be ready to be executed by NG device // This will be called by the NGEncOp - InputTensorBundle GetNextInputTensorBundleReadyForDeviceExecution() { + IOTensorBundle GetNextIOTensorBundleReadyForDeviceExecution() { return std::move(m_ng_2_tf.GetNextAvailable()); } @@ -138,19 +141,20 @@ class NGraphPrefetchSharedResouce : public ResourceBase { // // The interaction is as follows: // Iteration Action - // 1 NGEncOp pushes the Input tensors to m_ng_2_tf queue + // 1 NGEncOp pushes the Input/Output tensors to m_ng_2_tf queue // 2 - // Prefetcher pulls Input tensors out of m_ng_2_tf queue and copies + // Prefetcher pulls Input/Output tensors out of m_ng_2_tf queue and + // copies // TF - // data + // data to the prefetched inputs // Prefetcher pushes this item to the m_tf_2_ng queue - // NGEncOp pushes the Input tensors to m_ng_2_tf queue - // NGEncOp pulls Input tensors from m_tf_2_ng (from previous + // NGEncOp pushes the Input/Output tensors to m_ng_2_tf queue + // NGEncOp pulls Input/Output tensors from m_tf_2_ng (from previous // iteration) and executes // 3 Repeat - ThreadSafeQueue m_tf_2_ng; - ThreadSafeQueue m_ng_2_tf; + ThreadSafeQueue m_tf_2_ng; + ThreadSafeQueue m_ng_2_tf; int m_prefetch_buffer_depth{-1}; int m_skip_count{0}; From 8bc22bd9045b8f15bed23cc04dbc5120b7278ee7 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 17:37:17 -0800 Subject: [PATCH 14/67] refactored --- .../ngraph_encapsulate_get_prefetch.cc | 203 +++++++++++++++++- .../ngraph_encapsulate_get_prefetch.h | 13 +- ngraph_bridge/ngraph_encapsulate_op.cc | 147 +------------ .../ngraph_enter_prefetch_in_catalog.cc | 7 + 4 files changed, 228 insertions(+), 142 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc index b3c70c227..9a89143b7 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc @@ -15,6 +15,7 @@ *******************************************************************************/ #include "ngraph_bridge/ngraph_encapsulate_get_prefetch.h" +#include "ngraph_bridge/ngraph_prefetch_shared_data.h" using namespace std; @@ -22,7 +23,207 @@ namespace tensorflow { namespace ngraph_bridge { -Status GetPrefetchTensors() { return Status::OK(); } +Status GetPipelinedIOTensorsReadyForExecution( + OpKernelContext* ctx, const std::vector& tf_input_tensors, + const shared_ptr& pipelined_tensor_store, + const shared_ptr& tensor_manager, + std::tuple& + pipelined_io_tensors) { + auto io_tensors = pipelined_tensor_store->get_tensors(); + + int current_iter_pipeline_depth = get<0>(io_tensors); + PipelinedTensorVector ng_pipelined_inputs = get<1>(io_tensors); + PipelinedTensorVector ng_pipelined_outputs = get<2>(io_tensors); + auto pipelined_input_indexes = tensor_manager->GetPipelinedInputIndexes(); + auto pipelined_output_indexes = tensor_manager->GetPipelinedInputIndexes(); + + if (current_iter_pipeline_depth < 0) { + return errors::Internal("No free tensor available")); + } + + if (pipelined_input_indexes.size() != ng_pipelined_inputs.size()) { + return errors::Internal("Pipelined input tensors size ", ng_pipelined_inputs.size(), " does not match the no. of pipelined inputs indexes ", pipelined_input_indexes.size())); + } + + if (pipelined_output_indexes.size() != ng_pipelined_outputs.size()) { + return errors::Internal("Pipelined output tensors size ", ng_pipelined_outputs.size(), " does not match the no. of pipelined output indexes ", pipelined_output_indexes.size())); + } + + bool skip_tf2ng_copy = false; + if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != + nullptr) { + cout << "using prefetch env flag " << endl; + // Set the prefetch shared obj if applicable + NGraphPrefetchSharedResouce* shared_data = nullptr; + Status s = ctx->resource_manager()->Lookup( + NGraphPrefetchSharedResouce::CONTAINER_NAME, + NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); + + if (!s.ok()) { + // We are using this for the first time i.e., we need to do the following + // 1. Create the shared data object + // 2. We get another pipelined tensor pair for the current iteration and + // add it to the shared data. It will be accessed by prefetcher to copy + // the + // prefetched inputs to device + auto ng_prefetch_input_indexes = + tensor_manager->GetPipelinedInputIndexesThatArePrefetched(); + cout << "ng_prefetch_input_indexes " << ng_prefetch_input_indexes.size() + << endl; + + for (auto inp : ng_prefetch_input_indexes) { + cout << " inp indez " << inp << endl; + } + shared_data = new NGraphPrefetchSharedResouce( + name(), m_parallel_executor->GetOpBackendName(), + m_parallel_executor->GetGraphId(), + m_parallel_executor->GetNgraphClusterId(), ng_prefetch_input_indexes); + + // Get the set of IO tensors for the next iteration + std::tuple + io_tensors_next_iter; + io_tensors_next_iter = pipelined_tensor_store->get_tensors(); + + // Save the prefetched input ngTensors for the next iteration + NGraphPrefetchSharedResouce::IOTensorBundle next_io_tensor_bundle{ + get<0>(io_tensors_next_iter), get<1>(io_tensors_next_iter), + get<2>(io_tensors_next_iter)}; + + if (current_iter_pipeline_depth != (!next_io_tensor_bundle.Id)) { + return errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is also ", + next_io_tensor_bundle.Id); + } + + shared_data->AddNextIOTensorBundleForDeviceTransfer( + next_io_tensor_bundle); + + ctx->SetStatus(ctx->resource_manager()->Create( + NGraphPrefetchSharedResouce::CONTAINER_NAME, + NGraphPrefetchSharedResouce::RESOURCE_NAME, shared_data)); + // Continue the execution with the currently supplied TF tensor for the + // last time + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " + "signal prefetching"; + } else { + cout << "using prefetch inputs " << endl; + + int prefetch_buffer_depth = shared_data->GetBufferDepth(); + int skip_count = shared_data->GetSkipCount(); + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth + << " skip count; " << skip_count; + if (skip_count >= prefetch_buffer_depth) { + cout << "skip_tf2ng_copy true " << endl; + // We have been using the pipelined tensors - therefore do the + // following: + // 1. Save the prefetched Input/Output tensors for the current iteration + // to the shared data object so that the prefetcher + // can continue with copying the next set of inout tensor to the + // device + // 3. Execute the nGraph call for this iteration using the + // nG prefeteched input tensors we got from the shared data + + // Add the current prefetched tensors for the next iteration + // Get prefetched inputs + NGraphPrefetchSharedResouce::IOTensorBundle prefetch_io_tensor_bundle{ + current_iter_pipeline_depth, ng_pipelined_inputs, + ng_pipelined_outputs}; + shared_data->AddNextIOTensorBundleForDeviceTransfer( + prefetch_io_tensor_bundle); + + // Update the input_tensors with the one ready for exdcution + auto ng_io_tensor_bundle_ready = + shared_data->GetNextIOTensorBundleReadyForDeviceExecution(); + current_iter_pipeline_depth = ng_io_tensor_bundle_ready.Id; + ng_pipelined_inputs = ng_io_tensor_bundle_ready.Inputs; + ng_pipelined_outputs = ng_io_tensor_bundle_ready.Outputs; + if (current_iter_pipeline_depth != (!prefetch_io_tensor_bundle.Id)) { + return errors::Internal("Current Pipeline Depth is ", + current_iter_pipeline_depth, + " and next iter pipeline depth is ", "also ", + prefetch_io_tensor_bundle.Id); + } + skip_tf2ng_copy = true; + NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; + } + shared_data->IncrSkipCount(); + } + } + + // Allocate the input/ + ngraph::Event event_copy_input_tensor("Copy Pipelined Input Tensors", "", ""); + + if (!skip_tf2ng_copy) { + // All pipelined inputs are copied + + for (auto i = 0; i < pipelined_input_indexes.size(); i++) { + cout << "copying inputs true " << endl; + int index = pipelined_input_indexes[i]; + ng::element::Type ng_element_type; + OP_REQUIRES_OK( + ctx, TFDataTypeToNGraphElementType(tf_input_tensors[index].dtype(), + &ng_element_type)); + + void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[index]); + try { + ng_pipelined_inputs[i]->write( + current_src_ptr, ng_pipelined_inputs[i]->get_element_count() * + ng_element_type.size()); + } catch (const std::exception& exp) { + OP_REQUIRES( + ctx, false, + errors::Internal("Error copying TF tensor to device tensor: ", + exp.what())); + } catch (...) { + OP_REQUIRES( + ctx, false, + errors::Internal("Error copying TF tensor to device tensor")); + } + } + } else { + // All pipelined inputs that are not prefetched are copied + // Note skip_tf2ng_copy will be true only when PREFETCH is enabled via env + // flag + + // Gives the TF input index + auto pipelined_input_indexes_not_prefetched = + tensor_manager->GetPipelinedInputIndexes(); + + // Gives the mapping for corresponding + for (auto i = 0; i < pipelined_input_indexes_not_prefetched.size(); i++) { + cout << "copying inputs true " << endl; + int index = pipelined_input_indexes_not_prefetched[i]; + ng::element::Type ng_element_type; + OP_REQUIRES_OK( + ctx, TFDataTypeToNGraphElementType(tf_input_tensors[index].dtype(), + &ng_element_type)); + + void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[index]); + try { + ng_pipelined_inputs[index]->write( + current_src_ptr, ng_pipelined_inputs[index]->get_element_count() * + ng_element_type.size()); + } catch (const std::exception& exp) { + OP_REQUIRES( + ctx, false, + errors::Internal("Error copying TF tensor to device tensor: ", + exp.what())); + } catch (...) { + OP_REQUIRES( + ctx, false, + errors::Internal("Error copying TF tensor to device tensor")); + } + } + } + event_copy_input_tensor.Stop(); + ngraph::Event::write_trace(event_copy_input_tensor); + + pipelined_io_tensors = make_tuple(current_iter_pipeline_depth, + ng_pipelined_inputs, ng_pipelined_outputs); + + return Status::OK(); +} } // namespace ngraph_bridge } // namespace tensorflow diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h index cc09f3acf..0774674f1 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h @@ -14,8 +14,8 @@ * limitations under the License. *******************************************************************************/ -#ifndef NGRAPH_TF_BRIDGE_GET_PREFETCH_H -#define NGRAPH_TF_BRIDGE_GET_PREFETCH_H +#ifndef NGRAPH_TF_BRIDGE_GET_PIPELINED_TENSORS_H +#define NGRAPH_TF_BRIDGE_GET_PIPELINED_TENSORS_H #pragma once @@ -25,9 +25,14 @@ namespace tensorflow { namespace ngraph_bridge { -Status GetPrefetchTensors(); +Status GetPipelinedIOTensorsReadyForExecution( + OpKernelContext* ctx, const std::vector& tf_input_tensors, + const shared_ptr& pipelined_tensor_store, + const shared_ptr& tensor_manager, + std::tuple& + pipelined_io_tensors); } // namespace ngraph_bridge } // namespace tensorflow -#endif // NGRAPH_TF_BRIDGE_GET_PREFETCH_H +#endif // NGRAPH_TF_BRIDGE_GET_PIPELINED_TENSORS_H diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index bb7f9d512..99aba41f0 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -483,148 +483,21 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { num_of_outputs, "and number of exec outputs ", ng_exec->get_results().size(), " do not match")); - // create inputs, outputs, pipelineId - std::tuple io_tensors; - io_tensors = pipelined_tensor_store->get_tensors(); - OP_REQUIRES(ctx, !(std::get<0>(io_tensors) < 0), - errors::Internal("No free tensor available")); - int current_iter_pipeline_depth = get<0>(io_tensors); + // Get pipelined input output tensors for this iteration + std::tuple + pipelined_io_tensors; + OP_REQUIRES_OK(ctx, GetPipelinedIOTensorsReadyForExecution( + ctx, tf_input_tensors, pipelined_tensor_store, + pipelined_io_tensors)); + + int current_iter_pipeline_depth = get<0>(pipelined_io_tensors); vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); // Assume All inputs and outputs are pipelined // TODO: Fit in variables - ng_inputs = get<1>(io_tensors); - ng_outputs = get<2>(io_tensors); - - bool skip_tf2ng_copy = false; - if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != - nullptr) { - cout << "using prefetch env flag " << endl; - // Set the prefetch shared obj if applicable - NGraphPrefetchSharedResouce* shared_data = nullptr; - Status s = ctx->resource_manager()->Lookup( - NGraphPrefetchSharedResouce::CONTAINER_NAME, - NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); - - if (!s.ok()) { - // We are using this for the first time i.e., we need to do the following - // 1. Create the shared data object - // 2. We get another pipelined tensor pair for the current iteration and - // copy the TF tensor to this set and continue with the execution for - // for this iteration. - auto ng_prefetch_input_indexes = - tensor_manager->GetPipelinedInputIndexesThatArePrefetched(); - cout << "ng_prefetch_input_indexes " << ng_prefetch_input_indexes.size() - << endl; - - for (auto inp : ng_prefetch_input_indexes) { - cout << " inp indez " << inp << endl; - } - shared_data = new NGraphPrefetchSharedResouce( - name(), m_parallel_executor->GetOpBackendName(), - m_parallel_executor->GetGraphId(), - m_parallel_executor->GetNgraphClusterId(), ng_prefetch_input_indexes); - - // Get the set of IO tensors for the next iteration - std::tuple - io_tensors_next_iter; - io_tensors_next_iter = pipelined_tensor_store->get_tensors(); - - // Save the prefetched input ngTensors for the next iteration - NGraphPrefetchSharedResouce::IOTensorBundle next_io_tensor_bundle{ - get<0>(io_tensors_next_iter), get<1>(io_tensors_next_iter), - get<2>(io_tensors_next_iter)}; - - OP_REQUIRES(ctx, - current_iter_pipeline_depth == (!next_io_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is also ", - next_io_tensor_bundle.Id)); - - shared_data->AddNextIOTensorBundleForDeviceTransfer( - next_io_tensor_bundle); - - ctx->SetStatus(ctx->resource_manager()->Create( - NGraphPrefetchSharedResouce::CONTAINER_NAME, - NGraphPrefetchSharedResouce::RESOURCE_NAME, shared_data)); - // Continue the execution with the currently supplied TF tensor for the - // last time - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " - "signal prefetching"; - } else { - cout << "using prefetch inputs " << endl; - - int prefetch_buffer_depth = shared_data->GetBufferDepth(); - int skip_count = shared_data->GetSkipCount(); - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth - << " skip count; " << skip_count; - if (skip_count >= prefetch_buffer_depth) { - cout << "skip_tf2ng_copy true " << endl; - // We have been using the pipelined tensors - therefore do the - // following: - // 1. Save the prefetched Input/Output tensors for the current iteration - // to the shared data object so that the prefetcher - // can continue with copying the next set of inout tensor to the - // device - // 3. Execute the nGraph call for this iteration using the - // nG prefeteched input tensors we got from the shared data - - // Add the current prefetched tensors for the next iteration - // Get prefetched inputs - NGraphPrefetchSharedResouce::IOTensorBundle prefetch_io_tensor_bundle{ - current_iter_pipeline_depth, ng_inputs, ng_outputs}; - shared_data->AddNextIOTensorBundleForDeviceTransfer( - prefetch_io_tensor_bundle); - - // Update the input_tensors with the one ready for exdcution - auto ng_io_tensor_bundle_ready = - shared_data->GetNextIOTensorBundleReadyForDeviceExecution(); - current_iter_pipeline_depth = ng_io_tensor_bundle_ready.Id; - ng_inputs = ng_io_tensor_bundle_ready.Inputs; - ng_outputs = ng_io_tensor_bundle_ready.Outputs; - OP_REQUIRES( - ctx, current_iter_pipeline_depth == (!prefetch_io_tensor_bundle.Id), - errors::Internal("Current Pipeline Depth is ", - current_iter_pipeline_depth, - " and next iter pipeline depth is ", "also ", - prefetch_io_tensor_bundle.Id)); - skip_tf2ng_copy = true; - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Using device tensors"; - } - shared_data->IncrSkipCount(); - } - } - - // Allocate the input/ - ngraph::Event event_copy_input_tensor("Copy Input Tensor", "", ""); - - if (!skip_tf2ng_copy) { - for (auto i = 0; i < tf_input_tensors.size(); i++) { - cout << "copying inputs true " << endl; - ng::element::Type ng_element_type; - OP_REQUIRES_OK(ctx, TFDataTypeToNGraphElementType( - tf_input_tensors[i].dtype(), &ng_element_type)); - - void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[i]); - try { - ng_inputs[i]->write(current_src_ptr, ng_inputs[i]->get_element_count() * - ng_element_type.size()); - } catch (const std::exception& exp) { - OP_REQUIRES( - ctx, false, - errors::Internal("Error copying TF tensor to device tensor: ", - exp.what())); - } catch (...) { - OP_REQUIRES( - ctx, false, - errors::Internal("Error copying TF tensor to device tensor")); - } - } - } - event_copy_input_tensor.Stop(); - ngraph::Event::write_trace(event_copy_input_tensor); + ng_inputs = get<1>(pipelined_io_tensors); + ng_outputs = get<2>(pipelined_io_tensors); // And execute ngraph::Event event_execute_graph("Execute Graph", "", ""); diff --git a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc index 84513087f..8ff3bbd6e 100644 --- a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc +++ b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc @@ -23,6 +23,7 @@ #include "logging/ngraph_log.h" #include "ngraph_bridge/ngraph_catalog.h" #include "ngraph_bridge/ngraph_enter_prefetch_in_catalog.h" +#include "ngraph_bridge/ngraph_prefetch_shared_data.h" #include "ngraph_bridge/ngraph_utils.h" using namespace std; @@ -44,6 +45,12 @@ namespace ngraph_bridge { // Status EnterPrefetchInCatalog(Graph* graph, int graph_id) { + if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) == + nullptr) { + // if prefetch is not requested return + return Status::OK(); + } + // Go over all the nodes in the graph for (auto node : graph->op_nodes()) { // If the node is a NGraphEncapsulate, go over all it's From 33d4431399a53fbc9f4f008a5dacaa0721031112 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 4 Dec 2019 17:56:53 -0800 Subject: [PATCH 15/67] Indexes utilities --- ngraph_bridge/ngraph_encapsulate_get_prefetch.cc | 7 +++++-- ngraph_bridge/ngraph_tensor_manager.h | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc index 9a89143b7..f121d5b45 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc @@ -188,11 +188,14 @@ Status GetPipelinedIOTensorsReadyForExecution( // Gives the TF input index auto pipelined_input_indexes_not_prefetched = - tensor_manager->GetPipelinedInputIndexes(); + tensor_manager->GetPipelinedInputIndexesNotPrefetched(); + + auto pipelined_input_indexes_not_prefetched = + tensor_manager->GetPipelinedInputIndexesNotPrefetched(); // Gives the mapping for corresponding for (auto i = 0; i < pipelined_input_indexes_not_prefetched.size(); i++) { - cout << "copying inputs true " << endl; + cout << "copying some inputs true " << endl; int index = pipelined_input_indexes_not_prefetched[i]; ng::element::Type ng_element_type; OP_REQUIRES_OK( diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index 09060dd32..b22800d16 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -66,14 +66,26 @@ class NGraphTensorManager { return m_pipelined_output_indexes; } + // wrt to all inputs const vector& GetPrefetchedInputIndexes() { return m_prefetched_input_indexes; } + // wrt to all inputs + const vector& GetPipelinedButNotPrefetchedInputIndexes() { + return m_pipelined_not_prefetched_input_indexes; + } + + // wrt to pipelined inputs const vector& GetPipelinedInputIndexesThatArePrefetched() { return m_pipelined_input_indexes_prefetched; } + // wrt to pipelined inputs + const vector& GetPipelinedInputIndexesThatAreNotPrefetched() { + return m_pipelined_input_indexes_not_prefetched; + } + vector> GetPrefetchedTensors( const vector>& pipelined_input_tensors); @@ -94,9 +106,11 @@ class NGraphTensorManager { vector m_pipelined_input_indexes; vector m_pipelined_output_indexes; vector m_pipelined_input_indexes_prefetched; + vector m_pipelined_input_indexes_not_prefetched; //[TODO] Book-keeping for prefetched inputs vector m_prefetched_input_indexes; + vector m_pipelined_not_prefetched_input_indexes; }; } // namespace ngraph_bridge From 1c1aaffcc91bc5769440c40aa2aa09c73660e3c9 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 5 Dec 2019 17:54:40 -0800 Subject: [PATCH 16/67] Fix test --- .../ngraph_encapsulate_get_prefetch.cc | 88 ++++++++++--------- .../ngraph_encapsulate_get_prefetch.h | 13 ++- ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- ngraph_bridge/ngraph_prefetch_shared_data.h | 8 +- ngraph_bridge/ngraph_tensor_manager.cc | 10 +++ ngraph_bridge/ngraph_tensor_manager.h | 14 ++- ngraph_bridge/ngraph_utils.cc | 15 ++++ ngraph_bridge/ngraph_utils.h | 7 ++ 8 files changed, 103 insertions(+), 54 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc index f121d5b45..a8faeecfe 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc @@ -16,6 +16,7 @@ #include "ngraph_bridge/ngraph_encapsulate_get_prefetch.h" #include "ngraph_bridge/ngraph_prefetch_shared_data.h" +#include "ngraph_bridge/ngraph_utils.h" using namespace std; @@ -24,9 +25,9 @@ namespace tensorflow { namespace ngraph_bridge { Status GetPipelinedIOTensorsReadyForExecution( - OpKernelContext* ctx, const std::vector& tf_input_tensors, - const shared_ptr& pipelined_tensor_store, - const shared_ptr& tensor_manager, + OpKernelContext* ctx, std::vector& tf_input_tensors, + shared_ptr& pipelined_tensor_store, + shared_ptr& tensor_manager, std::tuple& pipelined_io_tensors) { auto io_tensors = pipelined_tensor_store->get_tensors(); @@ -35,18 +36,24 @@ Status GetPipelinedIOTensorsReadyForExecution( PipelinedTensorVector ng_pipelined_inputs = get<1>(io_tensors); PipelinedTensorVector ng_pipelined_outputs = get<2>(io_tensors); auto pipelined_input_indexes = tensor_manager->GetPipelinedInputIndexes(); - auto pipelined_output_indexes = tensor_manager->GetPipelinedInputIndexes(); + auto pipelined_output_indexes = tensor_manager->GetPipelinedOutputIndexes(); if (current_iter_pipeline_depth < 0) { - return errors::Internal("No free tensor available")); + return errors::Internal("No free tensor available"); } if (pipelined_input_indexes.size() != ng_pipelined_inputs.size()) { - return errors::Internal("Pipelined input tensors size ", ng_pipelined_inputs.size(), " does not match the no. of pipelined inputs indexes ", pipelined_input_indexes.size())); + return errors::Internal( + "Pipelined input tensors size ", ng_pipelined_inputs.size(), + " does not match the no. of pipelined inputs indexes ", + pipelined_input_indexes.size()); } if (pipelined_output_indexes.size() != ng_pipelined_outputs.size()) { - return errors::Internal("Pipelined output tensors size ", ng_pipelined_outputs.size(), " does not match the no. of pipelined output indexes ", pipelined_output_indexes.size())); + return errors::Internal( + "Pipelined output tensors size ", ng_pipelined_outputs.size(), + " does not match the no. of pipelined output indexes ", + pipelined_output_indexes.size()); } bool skip_tf2ng_copy = false; @@ -75,9 +82,8 @@ Status GetPipelinedIOTensorsReadyForExecution( cout << " inp indez " << inp << endl; } shared_data = new NGraphPrefetchSharedResouce( - name(), m_parallel_executor->GetOpBackendName(), - m_parallel_executor->GetGraphId(), - m_parallel_executor->GetNgraphClusterId(), ng_prefetch_input_indexes); + tensor_manager->GetName(), tensor_manager->GetGraphId(), + tensor_manager->GetClusterId(), ng_prefetch_input_indexes); // Get the set of IO tensors for the next iteration std::tuple @@ -159,26 +165,23 @@ Status GetPipelinedIOTensorsReadyForExecution( for (auto i = 0; i < pipelined_input_indexes.size(); i++) { cout << "copying inputs true " << endl; - int index = pipelined_input_indexes[i]; - ng::element::Type ng_element_type; - OP_REQUIRES_OK( - ctx, TFDataTypeToNGraphElementType(tf_input_tensors[index].dtype(), - &ng_element_type)); + int tf_index = pipelined_input_indexes[i]; + cout << "tf index " << tf_index << "ng index " << i << endl; - void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[index]); + ng::element::Type ng_element_type; + TF_RETURN_IF_ERROR(TFDataTypeToNGraphElementType( + tf_input_tensors[tf_index].dtype(), &ng_element_type)); + void* current_src_ptr = + (void*)DMAHelper::base(&tf_input_tensors[tf_index]); try { ng_pipelined_inputs[i]->write( current_src_ptr, ng_pipelined_inputs[i]->get_element_count() * ng_element_type.size()); } catch (const std::exception& exp) { - OP_REQUIRES( - ctx, false, - errors::Internal("Error copying TF tensor to device tensor: ", - exp.what())); + return errors::Internal("Error copying TF tensor to device tensor: ", + exp.what()); } catch (...) { - OP_REQUIRES( - ctx, false, - errors::Internal("Error copying TF tensor to device tensor")); + return errors::Internal("Error copying TF tensor to device tensor"); } } } else { @@ -186,36 +189,35 @@ Status GetPipelinedIOTensorsReadyForExecution( // Note skip_tf2ng_copy will be true only when PREFETCH is enabled via env // flag - // Gives the TF input index - auto pipelined_input_indexes_not_prefetched = - tensor_manager->GetPipelinedInputIndexesNotPrefetched(); + // Gives the TF input index : wrt to all inputs + auto pipelined_not_prefetched_input_indexes = + tensor_manager->GetPipelinedButNotPrefetchedInputIndexes(); + // Gives the corresponding pipelined input index : wrt pipelined auto pipelined_input_indexes_not_prefetched = - tensor_manager->GetPipelinedInputIndexesNotPrefetched(); + tensor_manager->GetPipelinedInputIndexesThatAreNotPrefetched(); // Gives the mapping for corresponding for (auto i = 0; i < pipelined_input_indexes_not_prefetched.size(); i++) { cout << "copying some inputs true " << endl; - int index = pipelined_input_indexes_not_prefetched[i]; + int tf_index = pipelined_not_prefetched_input_indexes[i]; + int ng_index = pipelined_input_indexes_not_prefetched[i]; ng::element::Type ng_element_type; - OP_REQUIRES_OK( - ctx, TFDataTypeToNGraphElementType(tf_input_tensors[index].dtype(), - &ng_element_type)); - - void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[index]); + cout << "tf index " << tf_index << " ng_index " << ng_index << endl; + TF_RETURN_IF_ERROR(TFDataTypeToNGraphElementType( + tf_input_tensors[tf_index].dtype(), &ng_element_type)); + void* current_src_ptr = + (void*)DMAHelper::base(&tf_input_tensors[tf_index]); try { - ng_pipelined_inputs[index]->write( - current_src_ptr, ng_pipelined_inputs[index]->get_element_count() * - ng_element_type.size()); + ng_pipelined_inputs[ng_index]->write( + current_src_ptr, + ng_pipelined_inputs[ng_index]->get_element_count() * + ng_element_type.size()); } catch (const std::exception& exp) { - OP_REQUIRES( - ctx, false, - errors::Internal("Error copying TF tensor to device tensor: ", - exp.what())); + return errors::Internal("Error copying TF tensor to device tensor: ", + exp.what()); } catch (...) { - OP_REQUIRES( - ctx, false, - errors::Internal("Error copying TF tensor to device tensor")); + return errors::Internal("Error copying TF tensor to device tensor"); } } } diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h index 0774674f1..33d1476d2 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h +++ b/ngraph_bridge/ngraph_encapsulate_get_prefetch.h @@ -21,15 +21,20 @@ #include "tensorflow/core/graph/graph.h" +#include "logging/ngraph_log.h" +#include "ngraph_bridge/ngraph_pipelined_tensors.h" +#include "ngraph_bridge/ngraph_tensor_manager.h" + +using namespace std; namespace tensorflow { namespace ngraph_bridge { Status GetPipelinedIOTensorsReadyForExecution( - OpKernelContext* ctx, const std::vector& tf_input_tensors, - const shared_ptr& pipelined_tensor_store, - const shared_ptr& tensor_manager, - std::tuple& + OpKernelContext* ctx, vector& tf_input_tensors, + shared_ptr& pipelined_tensor_store, + shared_ptr& tensor_manager, + tuple& pipelined_io_tensors); } // namespace ngraph_bridge diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 99aba41f0..8b62e4d78 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -488,7 +488,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { pipelined_io_tensors; OP_REQUIRES_OK(ctx, GetPipelinedIOTensorsReadyForExecution( ctx, tf_input_tensors, pipelined_tensor_store, - pipelined_io_tensors)); + tensor_manager, pipelined_io_tensors)); int current_iter_pipeline_depth = get<0>(pipelined_io_tensors); vector> ng_inputs(num_of_inputs); diff --git a/ngraph_bridge/ngraph_prefetch_shared_data.h b/ngraph_bridge/ngraph_prefetch_shared_data.h index 066ce54a3..8ce683efd 100644 --- a/ngraph_bridge/ngraph_prefetch_shared_data.h +++ b/ngraph_bridge/ngraph_prefetch_shared_data.h @@ -39,11 +39,11 @@ namespace ngraph_bridge { class NGraphPrefetchSharedResouce : public ResourceBase { public: explicit NGraphPrefetchSharedResouce(const std::string& ng_enc_op_name, - const std::string& backend_name, + // const std::string& backend_name, int cluster_id, int graph_id, const vector prefetch_input_indexes) : m_ng_enc_op_name(ng_enc_op_name), - m_backend_name(backend_name), + // m_backend_name(backend_name), m_graph_id(graph_id), m_cluster_id(cluster_id), m_prefetch_input_indexes(prefetch_input_indexes) {} @@ -54,7 +54,7 @@ class NGraphPrefetchSharedResouce : public ResourceBase { // Returns memory used by this resource. int64 MemoryUsed() const override { return 0; } std::string GetName() const { return m_ng_enc_op_name; } - std::string GetBackendName() const { return m_backend_name; } + // std::string GetBackendName() const { return m_backend_name; } int GetGraphId() const { return m_graph_id; } int GetClusterId() const { return m_cluster_id; } @@ -126,7 +126,7 @@ class NGraphPrefetchSharedResouce : public ResourceBase { private: const std::string m_ng_enc_op_name; - const std::string m_backend_name; + // const std::string m_backend_name; const int m_graph_id; const int m_cluster_id; const vector m_prefetch_input_indexes; diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 9576a7732..d8b0d6ffb 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -58,6 +58,10 @@ void NGraphTensorManager::Initialize() { m_output_indexes_that_need_copy.push_back(index); } } +#else + m_output_indexes_that_need_copy.resize(m_number_of_outputs); + iota(begin(m_output_indexes_that_need_copy), + end(m_output_indexes_that_need_copy), 0); #endif m_pipelined_input_indexes = FindComplement(m_number_of_inputs, m_input_indexes_from_variables); @@ -96,6 +100,12 @@ void NGraphTensorManager::Initialize() { m_pipelined_input_indexes_prefetched.push_back( position - m_pipelined_input_indexes.begin()); } + + // complements + m_pipelined_input_indexes_not_prefetched = FindComplement( + m_pipelined_input_indexes, m_pipelined_input_indexes_prefetched); + m_pipelined_not_prefetched_input_indexes = + FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); } //--------------------------------------------------------------------------- diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index b22800d16..af2b0f818 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -42,6 +42,12 @@ class NGraphTensorManager { ~NGraphTensorManager(); + string GetName() { return m_ng_encap_node_name; } + + int GetClusterId() { return m_ng_encap_cluster_id; } + + int GetGraphId() { return m_ng_encap_graph_id; } + const int& GetNumberOfInputs() { return m_number_of_inputs; } const int& GetNumberOfOutputs() { return m_number_of_outputs; } @@ -98,17 +104,21 @@ class NGraphTensorManager { int m_number_of_outputs; // Book-keeping for weights-on-device optimizations + // indexes wrt all inputs/outputs vector m_input_indexes_from_variables; vector m_output_indexes_assigning_variable; vector m_output_indexes_that_need_copy; - // All indexes that are not for from/to variables + // All indexes that are not from/to variables + // These are pipelined, some of these are also prefetched + // indexes wrt all inputs/outputs vector m_pipelined_input_indexes; vector m_pipelined_output_indexes; + // indexes wrt pipelined inputs vector m_pipelined_input_indexes_prefetched; vector m_pipelined_input_indexes_not_prefetched; - //[TODO] Book-keeping for prefetched inputs + // indexes wrt all inputs vector m_prefetched_input_indexes; vector m_pipelined_not_prefetched_input_indexes; }; diff --git a/ngraph_bridge/ngraph_utils.cc b/ngraph_bridge/ngraph_utils.cc index 686799e94..240ed82c6 100644 --- a/ngraph_bridge/ngraph_utils.cc +++ b/ngraph_bridge/ngraph_utils.cc @@ -57,6 +57,21 @@ vector FindComplement(const int& max_element, return complement; } +// Finds the complement of element_set +// From the superset +// Finds: superset - element_set +// Assumes superset and element_superset are sorted +vector FindComplement(const vector& superset, + const vector& element_set) { + // max size of complement is superset + vector complement(superset.size()); + vector::iterator it = set_difference( + superset.begin(), superset.begin() + superset.size(), element_set.begin(), + element_set.begin() + element_set.size(), complement.begin()); + complement.resize(it - complement.begin()); + return complement; +} + int FindNumberOfNodes(const Graph* graph, const string op_type) { int count = 0; for (auto node : graph->nodes()) { diff --git a/ngraph_bridge/ngraph_utils.h b/ngraph_bridge/ngraph_utils.h index 6df01a1dd..daa6cbf5b 100644 --- a/ngraph_bridge/ngraph_utils.h +++ b/ngraph_bridge/ngraph_utils.h @@ -47,6 +47,13 @@ namespace ngraph_bridge { vector FindComplement(const int& max_element, const vector& element_set); +// Finds the complement of element_set +// From the superset +// Finds: superset - element_set +// Assumes superset and element_superset are sorted +vector FindComplement(const vector& element_superset, + const vector& element_set); + int FindNumberOfNodes(const Graph* graph, const string op_type); Status IsNgraphTFLogTensorCopiesEnabled(int graph_id, From 958f62e302bab0162c7e5f057346b1abc7817713 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Tue, 10 Dec 2019 17:38:49 -0800 Subject: [PATCH 17/67] renamed the files --- bazel/BUILD | 4 ++-- ngraph_bridge/CMakeLists.txt | 2 +- ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- ...psulate_get_prefetch.cc => ngraph_encapsulate_op_utils.cc} | 2 +- ...capsulate_get_prefetch.h => ngraph_encapsulate_op_utils.h} | 0 5 files changed, 5 insertions(+), 5 deletions(-) rename ngraph_bridge/{ngraph_encapsulate_get_prefetch.cc => ngraph_encapsulate_op_utils.cc} (99%) rename ngraph_bridge/{ngraph_encapsulate_get_prefetch.h => ngraph_encapsulate_op_utils.h} (100%) diff --git a/bazel/BUILD b/bazel/BUILD index 15964fd1e..6e59a58a3 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -36,9 +36,9 @@ cc_library( "ngraph_bridge/ngraph_enter_prefetch_in_catalog.h", "ngraph_bridge/ngraph_executor.h", "ngraph_bridge/ngraph_encapsulate_op.h", + "ngraph_bridge/ngraph_encapsulate_op_utils.h", "ngraph_bridge/ngraph_data_cache.h", "ngraph_bridge/ngraph_find_replace_prefetchdataset.h", - "ngraph_bridge/ngraph_encapsulate_get_prefetch.h", "ngraph_bridge/ngraph_freshness_tracker.h", "ngraph_bridge/ngraph_mark_for_clustering.h", "ngraph_bridge/ngraph_partial_shapes.h", @@ -80,9 +80,9 @@ cc_library( "ngraph_bridge/ngraph_encapsulate_clusters.cc", "ngraph_bridge/ngraph_encapsulate_impl.cc", "ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc", - "ngraph_bridge/ngraph_encapsulate_get_prefetch.cc", "ngraph_bridge/ngraph_executor.cc", "ngraph_bridge/ngraph_encapsulate_op.cc", + "ngraph_bridge/ngraph_encapsulate_op_utils.cc", "ngraph_bridge/ngraph_freshness_tracker.cc", "ngraph_bridge/ngraph_mark_for_clustering.cc", "ngraph_bridge/ngraph_partial_shapes.cc", diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt index f48aeef56..d895a3028 100644 --- a/ngraph_bridge/CMakeLists.txt +++ b/ngraph_bridge/CMakeLists.txt @@ -48,6 +48,7 @@ set(SRC ngraph_executor.cc ops/ngraph_ops.cc ngraph_encapsulate_op.cc + ngraph_encapsulate_op_utils.cc ngraph_freshness_tracker.cc ngraph_mark_for_clustering.cc ngraph_partial_shapes.cc @@ -60,7 +61,6 @@ set(SRC tf_deadness_analysis.cc prefetch_autotuner.cc ngraph_prefetch_dataset_op.cc - ngraph_encapsulate_get_prefetch.cc stats_utils.cc version.cc ) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index c87e83c7e..35328586e 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -40,9 +40,9 @@ #include "ngraph_bridge/ngraph_backend_manager.h" #include "ngraph_bridge/ngraph_builder.h" #include "ngraph_bridge/ngraph_cluster_manager.h" -#include "ngraph_bridge/ngraph_encapsulate_get_prefetch.h" #include "ngraph_bridge/ngraph_encapsulate_impl.h" #include "ngraph_bridge/ngraph_encapsulate_op.h" +#include "ngraph_bridge/ngraph_encapsulate_op_utils.h" #include "ngraph_bridge/ngraph_freshness_tracker.h" #include "ngraph_bridge/ngraph_mark_for_clustering.h" #include "ngraph_bridge/ngraph_pipelined_tensors.h" diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc similarity index 99% rename from ngraph_bridge/ngraph_encapsulate_get_prefetch.cc rename to ngraph_bridge/ngraph_encapsulate_op_utils.cc index a8faeecfe..1db2c164e 100644 --- a/ngraph_bridge/ngraph_encapsulate_get_prefetch.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -14,7 +14,7 @@ * limitations under the License. *******************************************************************************/ -#include "ngraph_bridge/ngraph_encapsulate_get_prefetch.h" +#include "ngraph_bridge/ngraph_encapsulate_op_utils.h" #include "ngraph_bridge/ngraph_prefetch_shared_data.h" #include "ngraph_bridge/ngraph_utils.h" diff --git a/ngraph_bridge/ngraph_encapsulate_get_prefetch.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h similarity index 100% rename from ngraph_bridge/ngraph_encapsulate_get_prefetch.h rename to ngraph_bridge/ngraph_encapsulate_op_utils.h From bfef9c08d6c02141cbf7ba4bf1eeee6a63c498a7 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Tue, 10 Dec 2019 17:56:30 -0800 Subject: [PATCH 18/67] Fixed Prefetch Tests --- test/test_enter_prefetch_in_catalog.cc | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/test_enter_prefetch_in_catalog.cc b/test/test_enter_prefetch_in_catalog.cc index 0d4309260..2d23af0c3 100644 --- a/test/test_enter_prefetch_in_catalog.cc +++ b/test/test_enter_prefetch_in_catalog.cc @@ -46,6 +46,12 @@ namespace ngraph_bridge { namespace testing { TEST(PrefetchCatalogTest, SmallGraph1) { + // Set flag to enable prefetch + list env_vars{"NGRAPH_TF_USE_PREFETCH"}; + const unordered_map& env_map = StoreEnv(env_vars); + SetEnvVariable("NGRAPH_TF_USE_PREFETCH", "1"); + + // Create Graph GraphConstructorOptions opts; opts.allow_internal_ops = true; Graph input_graph(OpRegistry::Global()); @@ -72,9 +78,17 @@ TEST(PrefetchCatalogTest, SmallGraph1) { // Clean up NGraphCatalog::ClearCatalog(); + // Unset, Restore env flga + UnsetEnvVariable("NGRAPH_TF_USE_PREFETCH"); + RestoreEnv(env_map); } TEST(PrefetchCatalogTest, SmallGraph2) { + // Set flag to enable prefetch + list env_vars{"NGRAPH_TF_USE_PREFETCH"}; + const unordered_map& env_map = StoreEnv(env_vars); + SetEnvVariable("NGRAPH_TF_USE_PREFETCH", "1"); + GraphConstructorOptions opts; opts.allow_internal_ops = true; Graph input_graph(OpRegistry::Global()); @@ -98,6 +112,9 @@ TEST(PrefetchCatalogTest, SmallGraph2) { // Clean up NGraphCatalog::ClearCatalog(); + // Unset, Restore env flga + UnsetEnvVariable("NGRAPH_TF_USE_PREFETCH"); + RestoreEnv(env_map); } } // namespace testing From 065db005ea99ab10c8ee9f8ca8e7b9e812c089a1 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 14:56:07 -0800 Subject: [PATCH 19/67] fixed tests --- ngraph_bridge/ngraph_tensor_manager.cc | 14 ----- ngraph_bridge/ngraph_tensor_manager.h | 3 - test/test_ngraph_tensor_manager.cpp | 83 +++----------------------- 3 files changed, 8 insertions(+), 92 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index d8b0d6ffb..5a8db0ccb 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -113,19 +113,5 @@ void NGraphTensorManager::Initialize() { //--------------------------------------------------------------------------- NGraphTensorManager::~NGraphTensorManager() {} -//--------------------------------------------------------------------------- -// NGraphTensorManager::GetPrefetchedTensors -//--------------------------------------------------------------------------- -vector> -NGraphTensorManager::GetPrefetchedTensors( - const vector>& pipelined_input_tensors) { - vector> prefetched_tensors; - auto prefetched_indexes = GetPipelinedInputIndexesThatArePrefetched(); - for (auto index : prefetched_indexes) { - prefetched_tensors.push_back(pipelined_input_tensors[index]); - } - return prefetched_tensors; -} - } // namespace ngraph_bridge } // namespace tensorflow \ No newline at end of file diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index af2b0f818..2fe2aa0a7 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -92,9 +92,6 @@ class NGraphTensorManager { return m_pipelined_input_indexes_not_prefetched; } - vector> GetPrefetchedTensors( - const vector>& pipelined_input_tensors); - private: void Initialize(); string m_ng_encap_node_name; diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index e5d9056a2..a4eb96ae2 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -129,11 +129,13 @@ TEST_F(NGraphTensorManagerTest, NoVariablesNoPrefetch) { vector empty; vector expected_pipelined_inp_indexes = FillRange(number_of_inputs); vector expected_pipelined_out_indexes = FillRange(number_of_outputs); + vector expected_out_indexes_need_copy = FillRange(number_of_outputs); // var related ASSERT_EQ(empty, tensor_manager.GetInputIndexesFedByVariables()); ASSERT_EQ(empty, tensor_manager.GetOutputIndexesAssigningVariables()); - ASSERT_EQ(empty, tensor_manager.GetOutputIndexesThatNeedCopy()); + ASSERT_EQ(expected_out_indexes_need_copy, + tensor_manager.GetOutputIndexesThatNeedCopy()); ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, @@ -180,7 +182,7 @@ TEST_F(NGraphTensorManagerTest, HasVariablesNoPrefetch) { expected_var_inp_indexes = {}; expected_var_out_indexes = {}; - expected_out_indexes_need_copy = {}; + expected_out_indexes_need_copy = FillRange(number_of_outputs); expected_prefetched_inp_indexes = {}; } @@ -220,6 +222,7 @@ TEST_F(NGraphTensorManagerTest, NoVariablesHasPrefetch) { vector empty; vector expected_pipelined_inp_indexes = FillRange(number_of_inputs); vector expected_pipelined_out_indexes = FillRange(number_of_outputs); + vector expected_out_indexes_need_copy = FillRange(number_of_outputs); vector expected_prefetched_inp_indexes = {1, 3}; vector expected_pipelined_inp_indexes_prefetched = { 1, 3}; // as all inputs are pipelined @@ -234,7 +237,8 @@ TEST_F(NGraphTensorManagerTest, NoVariablesHasPrefetch) { // var related ASSERT_EQ(empty, tensor_manager.GetInputIndexesFedByVariables()); ASSERT_EQ(empty, tensor_manager.GetOutputIndexesAssigningVariables()); - ASSERT_EQ(empty, tensor_manager.GetOutputIndexesThatNeedCopy()); + ASSERT_EQ(expected_out_indexes_need_copy, + tensor_manager.GetOutputIndexesThatNeedCopy()); ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, @@ -289,7 +293,7 @@ TEST_F(NGraphTensorManagerTest, VariablesAndPrefetch) { expected_var_inp_indexes = {}; expected_var_out_indexes = {}; - expected_out_indexes_need_copy = {}; + expected_out_indexes_need_copy = FillRange(number_of_outputs); } EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, @@ -341,77 +345,6 @@ TEST_F(NGraphTensorManagerTest, PrefetchNotInPipeline) { ClearCatalog(); } -TEST_F(NGraphTensorManagerTest, GetPrefetchedTensors1) { - string ng_encap_node_name = "xyz_1"; - int ng_encap_cluster_id = 1; - int ng_encap_graph_id = 1; - int number_of_inputs = 7; - int number_of_outputs = 4; - - // expected - vector expected_pipelined_inp_indexes, expected_pipelined_out_indexes, - expected_var_inp_indexes, expected_var_out_indexes, - expected_out_indexes_need_copy, expected_prefetched_inp_indexes, - expected_pipelined_inp_indexes_prefetched; - - if (ngraph_tf_are_variables_enabled()) { - // expected values - expected_pipelined_inp_indexes = {1, 3, 4, 6}; - expected_prefetched_inp_indexes = {3, 6}; - expected_pipelined_inp_indexes_prefetched = {1, 3}; - expected_pipelined_out_indexes = {0, 2}; - expected_var_inp_indexes = - FindComplement(number_of_inputs, expected_pipelined_inp_indexes); - expected_var_out_indexes = - FindComplement(number_of_outputs, expected_pipelined_out_indexes); - expected_out_indexes_need_copy = {2, 3}; - // enter in catalog - EnterVarInCatalog(ng_encap_graph_id, ng_encap_node_name, - expected_var_inp_indexes, expected_var_out_indexes, - expected_out_indexes_need_copy); - - } else { - expected_pipelined_inp_indexes = FillRange(number_of_inputs); - expected_pipelined_out_indexes = FillRange(number_of_outputs); - expected_prefetched_inp_indexes = {3, 6}; - expected_pipelined_inp_indexes_prefetched = { - 3, 6}; // all inputs are pipelined - - expected_var_inp_indexes = {}; - expected_var_out_indexes = {}; - expected_out_indexes_need_copy = {}; - } - - EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, - expected_prefetched_inp_indexes); - - NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, - ng_encap_graph_id, number_of_inputs, - number_of_outputs); - - vector> pipelined_input_tensors( - expected_pipelined_inp_indexes.size()); - - for (int i = 0; i < pipelined_input_tensors.size(); i++) { - pipelined_input_tensors[i] = - CreateNGraphScalarTensor(expected_pipelined_inp_indexes[i]); - } - - vector> prefetched_input_tensors = - tensor_manager.GetPrefetchedTensors(pipelined_input_tensors); - ASSERT_EQ(prefetched_input_tensors.size(), - expected_prefetched_inp_indexes.size()); - - for (int i = 0; i < expected_prefetched_inp_indexes.size(); i++) { - int tensor_val = 0; - prefetched_input_tensors[i]->read(&tensor_val, sizeof(tensor_val)); - ASSERT_EQ(tensor_val, expected_prefetched_inp_indexes[i]); - } - - // clean up - ClearCatalog(); -} - } // namespace testing } // namespace ngraph_bridge } // namespace tensorflow \ No newline at end of file From 30fec1664c6b3b12d50019e40e4d005848ba5aee Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 16:07:03 -0800 Subject: [PATCH 20/67] Removed couts --- ngraph_bridge/ngraph_catalog.cc | 4 ---- ngraph_bridge/ngraph_encapsulate_op.cc | 1 - ngraph_bridge/ngraph_encapsulate_op_utils.cc | 13 ------------- ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc | 4 ---- ngraph_bridge/ngraph_tensor_manager.cc | 8 -------- 5 files changed, 30 deletions(-) diff --git a/ngraph_bridge/ngraph_catalog.cc b/ngraph_bridge/ngraph_catalog.cc index fdb70790d..95b65f506 100644 --- a/ngraph_bridge/ngraph_catalog.cc +++ b/ngraph_bridge/ngraph_catalog.cc @@ -220,21 +220,17 @@ void NGraphCatalog::AddToPrefetchedInputIndexMap( throw runtime_error("Trying to add an already existing key ( " + key + " ) in PrefetchedInputIndexMap "); } - cout << " AddToPrefetchedInputIndexMap key " << key << endl; NGraphCatalog::prefetched_input_index_map_.insert({key, val}); } bool NGraphCatalog::ExistsInPrefetchedInputIndexMap(const int& graphid, const string& node_name) { string key = NGraphCatalog::CreateNodeKey(graphid, node_name); - cout << " ExistsInPrefetchedInputIndexMap key " << key << endl; return NGraphCatalog::ExistsInPrefetchedInputIndexMap(key); } bool NGraphCatalog::ExistsInPrefetchedInputIndexMap(const string& key) { auto itr = NGraphCatalog::prefetched_input_index_map_.find(key); - cout << " ExistsInPrefetchedInputIndexMap check " - << (itr != NGraphCatalog::prefetched_input_index_map_.end()) << endl; return itr != NGraphCatalog::prefetched_input_index_map_.end(); } diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 35328586e..72bc9bff7 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -418,7 +418,6 @@ void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { // ComputeUsingParallelExecutor //--------------------------------------------------------------------------- void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { - cout << "using parallel exec " << endl; // TF input tensors std::vector tf_input_tensors; diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 1db2c164e..55faae905 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -59,7 +59,6 @@ Status GetPipelinedIOTensorsReadyForExecution( bool skip_tf2ng_copy = false; if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != nullptr) { - cout << "using prefetch env flag " << endl; // Set the prefetch shared obj if applicable NGraphPrefetchSharedResouce* shared_data = nullptr; Status s = ctx->resource_manager()->Lookup( @@ -75,12 +74,7 @@ Status GetPipelinedIOTensorsReadyForExecution( // prefetched inputs to device auto ng_prefetch_input_indexes = tensor_manager->GetPipelinedInputIndexesThatArePrefetched(); - cout << "ng_prefetch_input_indexes " << ng_prefetch_input_indexes.size() - << endl; - for (auto inp : ng_prefetch_input_indexes) { - cout << " inp indez " << inp << endl; - } shared_data = new NGraphPrefetchSharedResouce( tensor_manager->GetName(), tensor_manager->GetGraphId(), tensor_manager->GetClusterId(), ng_prefetch_input_indexes); @@ -113,14 +107,11 @@ Status GetPipelinedIOTensorsReadyForExecution( NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " "signal prefetching"; } else { - cout << "using prefetch inputs " << endl; - int prefetch_buffer_depth = shared_data->GetBufferDepth(); int skip_count = shared_data->GetSkipCount(); NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth << " skip count; " << skip_count; if (skip_count >= prefetch_buffer_depth) { - cout << "skip_tf2ng_copy true " << endl; // We have been using the pipelined tensors - therefore do the // following: // 1. Save the prefetched Input/Output tensors for the current iteration @@ -164,9 +155,7 @@ Status GetPipelinedIOTensorsReadyForExecution( // All pipelined inputs are copied for (auto i = 0; i < pipelined_input_indexes.size(); i++) { - cout << "copying inputs true " << endl; int tf_index = pipelined_input_indexes[i]; - cout << "tf index " << tf_index << "ng index " << i << endl; ng::element::Type ng_element_type; TF_RETURN_IF_ERROR(TFDataTypeToNGraphElementType( @@ -199,11 +188,9 @@ Status GetPipelinedIOTensorsReadyForExecution( // Gives the mapping for corresponding for (auto i = 0; i < pipelined_input_indexes_not_prefetched.size(); i++) { - cout << "copying some inputs true " << endl; int tf_index = pipelined_not_prefetched_input_indexes[i]; int ng_index = pipelined_input_indexes_not_prefetched[i]; ng::element::Type ng_element_type; - cout << "tf index " << tf_index << " ng_index " << ng_index << endl; TF_RETURN_IF_ERROR(TFDataTypeToNGraphElementType( tf_input_tensors[tf_index].dtype(), &ng_element_type)); void* current_src_ptr = diff --git a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc index 8ff3bbd6e..75d96e954 100644 --- a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc +++ b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.cc @@ -69,10 +69,6 @@ Status EnterPrefetchInCatalog(Graph* graph, int graph_id) { } // end loop over input edges if (in_indexes_for_encap.size() > 0) { - for (auto i : in_indexes_for_encap) { - cout << "Enter Prefetch in catalog " << i << endl; - } - try { NGraphCatalog::AddToPrefetchedInputIndexMap(graph_id, node->name(), in_indexes_for_encap); diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 5a8db0ccb..7255a0081 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -68,24 +68,16 @@ void NGraphTensorManager::Initialize() { m_pipelined_output_indexes = FindComplement(m_number_of_outputs, m_output_indexes_assigning_variable); - cout << "TM m_ng_encap_graph_id " << m_ng_encap_graph_id << endl; - cout << "TM m_ng_encap_node_name " << m_ng_encap_node_name << endl; if (NGraphCatalog::ExistsInPrefetchedInputIndexMap(m_ng_encap_graph_id, m_ng_encap_node_name)) { auto prefetch_indexes = NGraphCatalog::GetIndexesFromPrefetchedInputIndexMap( m_ng_encap_graph_id, m_ng_encap_node_name); - for (auto i : m_prefetched_input_indexes) { - cout << "TM " << i << endl; - } m_prefetched_input_indexes.insert(m_prefetched_input_indexes.begin(), prefetch_indexes.begin(), prefetch_indexes.end()); // keeping the indexes sorted, is helpful in general testing sort(m_prefetched_input_indexes.begin(), m_prefetched_input_indexes.end()); - for (auto i : m_prefetched_input_indexes) { - cout << "TM " << i << endl; - } } // the prefetched input indexes will also be pipelined From 5b0bd525576fc7a1076e463ed1d80f6973c12c49 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 16:12:19 -0800 Subject: [PATCH 21/67] minor --- ngraph_bridge/ngraph_deassign_clusters.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ngraph_bridge/ngraph_deassign_clusters.cc b/ngraph_bridge/ngraph_deassign_clusters.cc index 3d4a1bc4d..2f51b3650 100644 --- a/ngraph_bridge/ngraph_deassign_clusters.cc +++ b/ngraph_bridge/ngraph_deassign_clusters.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ - #include #include #include From bface4032cd24f32f936edfe4e517e69e14e881e Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 17:07:30 -0800 Subject: [PATCH 22/67] fixed tests --- test/test_ngraph_tensor_manager.cpp | 134 +++++++++++++++++++++------- 1 file changed, 102 insertions(+), 32 deletions(-) diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index a4eb96ae2..0efba0163 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -82,21 +82,6 @@ class NGraphTensorManagerTest : public ::testing::Test { iota(vout.begin(), vout.end(), 0); return vout; } - - // Creates ngraph tensor - shared_ptr CreateNGraphScalarTensor( - int value, string backend_type = "INTERPRETER") { - // create scalar tensor - ng::Shape ng_shape_scalar({}); - - // create Backend - auto backend = ng::runtime::Backend::create(backend_type); - - auto temp = backend->create_tensor(ng::element::i32, ng_shape_scalar); - - temp->write(&value, sizeof(value)); - return temp; - } }; TEST(NGraphUtils, FindComplement1) { @@ -136,6 +121,7 @@ TEST_F(NGraphTensorManagerTest, NoVariablesNoPrefetch) { ASSERT_EQ(empty, tensor_manager.GetOutputIndexesAssigningVariables()); ASSERT_EQ(expected_out_indexes_need_copy, tensor_manager.GetOutputIndexesThatNeedCopy()); + // piplined ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, @@ -143,6 +129,13 @@ TEST_F(NGraphTensorManagerTest, NoVariablesNoPrefetch) { // prefetched ASSERT_EQ(empty, tensor_manager.GetPrefetchedInputIndexes()); + ASSERT_EQ(expected_pipelined_inp_indexes, + tensor_manager.GetPipelinedButNotPrefetchedInputIndexes()); + + // prefetched wrt pipelined + ASSERT_EQ(empty, tensor_manager.GetPipelinedInputIndexesThatArePrefetched()); + ASSERT_EQ(expected_pipelined_inp_indexes, + tensor_manager.GetPipelinedInputIndexesThatAreNotPrefetched()); } // Tests scenario when the graph has variables but no prefetched inputs @@ -158,18 +151,31 @@ TEST_F(NGraphTensorManagerTest, HasVariablesNoPrefetch) { // expected vector expected_pipelined_inp_indexes, expected_pipelined_out_indexes, expected_var_inp_indexes, expected_var_out_indexes, - expected_out_indexes_need_copy, expected_prefetched_inp_indexes; + expected_out_indexes_need_copy, expected_prefetched_inp_indexes, + expected_pipelined_not_prefetched_input_indexes, + expected_pipelined_input_indexes_prefetched, + expected_pipelined_input_indexes_not_prefetched; + // expected values if (ngraph_tf_are_variables_enabled()) { - // expected values + // pipelined expected_pipelined_inp_indexes = {1, 3, 4}; expected_pipelined_out_indexes = {1}; + // var expected_var_inp_indexes = FindComplement(number_of_inputs, expected_pipelined_inp_indexes); expected_var_out_indexes = FindComplement(number_of_outputs, expected_pipelined_out_indexes); expected_out_indexes_need_copy = {1}; + + // prefetched expected_prefetched_inp_indexes = {}; + expected_pipelined_not_prefetched_input_indexes = + expected_pipelined_inp_indexes; + + // prefetched relative to pipelined tensors + expected_pipelined_input_indexes_prefetched = {}; + expected_pipelined_input_indexes_not_prefetched = {0, 1, 2}; // enter in catalog EnterVarInCatalog(ng_encap_graph_id, ng_encap_node_name, @@ -177,19 +183,29 @@ TEST_F(NGraphTensorManagerTest, HasVariablesNoPrefetch) { expected_out_indexes_need_copy); } else { + // pipelined expected_pipelined_inp_indexes = FillRange(number_of_inputs); expected_pipelined_out_indexes = FillRange(number_of_outputs); - + // var expected_var_inp_indexes = {}; expected_var_out_indexes = {}; expected_out_indexes_need_copy = FillRange(number_of_outputs); + // prefetched expected_prefetched_inp_indexes = {}; + expected_pipelined_not_prefetched_input_indexes = + expected_pipelined_inp_indexes; + + // prefetched relative to pipelined tensors + expected_pipelined_input_indexes_prefetched = {}; + expected_pipelined_input_indexes_not_prefetched = + expected_pipelined_not_prefetched_input_indexes; } NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, ng_encap_graph_id, number_of_inputs, number_of_outputs); + // var ASSERT_EQ(expected_var_inp_indexes, tensor_manager.GetInputIndexesFedByVariables()); ASSERT_EQ(expected_var_out_indexes, @@ -197,14 +213,24 @@ TEST_F(NGraphTensorManagerTest, HasVariablesNoPrefetch) { ASSERT_EQ(expected_out_indexes_need_copy, tensor_manager.GetOutputIndexesThatNeedCopy()); - ASSERT_EQ(expected_prefetched_inp_indexes, - tensor_manager.GetPrefetchedInputIndexes()); - + // pipelined ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, tensor_manager.GetPipelinedOutputIndexes()); + // prefetched + ASSERT_EQ(expected_prefetched_inp_indexes, + tensor_manager.GetPrefetchedInputIndexes()); + ASSERT_EQ(expected_pipelined_not_prefetched_input_indexes, + tensor_manager.GetPipelinedButNotPrefetchedInputIndexes()); + + // prefetched wrt pipelined + ASSERT_EQ(expected_pipelined_input_indexes_prefetched, + tensor_manager.GetPipelinedInputIndexesThatArePrefetched()); + ASSERT_EQ(expected_pipelined_input_indexes_not_prefetched, + tensor_manager.GetPipelinedInputIndexesThatAreNotPrefetched()); + // clean up ClearCatalog(); } @@ -219,13 +245,24 @@ TEST_F(NGraphTensorManagerTest, NoVariablesHasPrefetch) { int number_of_outputs = 2; // expected + // var vector empty; + vector expected_out_indexes_need_copy = FillRange(number_of_outputs); + + // pipelined vector expected_pipelined_inp_indexes = FillRange(number_of_inputs); vector expected_pipelined_out_indexes = FillRange(number_of_outputs); - vector expected_out_indexes_need_copy = FillRange(number_of_outputs); + + // prefetched vector expected_prefetched_inp_indexes = {1, 3}; - vector expected_pipelined_inp_indexes_prefetched = { - 1, 3}; // as all inputs are pipelined + vector expected_pipelined_not_prefetched_input_indexes = {0, 2, 4}; + + // relative to pipelined tensors + // all pipelined are prefetched + vector expected_pipelined_input_indexes_prefetched = + expected_prefetched_inp_indexes; + vector expected_pipelined_input_indexes_not_prefetched = + expected_pipelined_not_prefetched_input_indexes; EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, expected_prefetched_inp_indexes); @@ -239,6 +276,7 @@ TEST_F(NGraphTensorManagerTest, NoVariablesHasPrefetch) { ASSERT_EQ(empty, tensor_manager.GetOutputIndexesAssigningVariables()); ASSERT_EQ(expected_out_indexes_need_copy, tensor_manager.GetOutputIndexesThatNeedCopy()); + // pipelined ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, @@ -247,9 +285,14 @@ TEST_F(NGraphTensorManagerTest, NoVariablesHasPrefetch) { // prefetched ASSERT_EQ(expected_prefetched_inp_indexes, tensor_manager.GetPrefetchedInputIndexes()); - ASSERT_EQ(expected_pipelined_inp_indexes_prefetched, - tensor_manager.GetPipelinedInputIndexesThatArePrefetched()); + ASSERT_EQ(expected_pipelined_not_prefetched_input_indexes, + tensor_manager.GetPipelinedButNotPrefetchedInputIndexes()); + // prefetched wrt pipelined + ASSERT_EQ(expected_pipelined_input_indexes_prefetched, + tensor_manager.GetPipelinedInputIndexesThatArePrefetched()); + ASSERT_EQ(expected_pipelined_input_indexes_not_prefetched, + tensor_manager.GetPipelinedInputIndexesThatAreNotPrefetched()); // clean up ClearCatalog(); } @@ -266,34 +309,53 @@ TEST_F(NGraphTensorManagerTest, VariablesAndPrefetch) { vector expected_pipelined_inp_indexes, expected_pipelined_out_indexes, expected_var_inp_indexes, expected_var_out_indexes, expected_out_indexes_need_copy, expected_prefetched_inp_indexes, - expected_pipelined_inp_indexes_prefetched; + expected_pipelined_not_prefetched_input_indexes, + expected_pipelined_inp_indexes_prefetched, + expected_pipelined_inp_indexes_not_prefetched; if (ngraph_tf_are_variables_enabled()) { // expected values + // pipelined expected_pipelined_inp_indexes = {1, 3, 4, 6}; - expected_prefetched_inp_indexes = {3, 6}; - expected_pipelined_inp_indexes_prefetched = {1, 3}; expected_pipelined_out_indexes = {0, 2}; + // var expected_var_inp_indexes = FindComplement(number_of_inputs, expected_pipelined_inp_indexes); expected_var_out_indexes = FindComplement(number_of_outputs, expected_pipelined_out_indexes); expected_out_indexes_need_copy = {2, 3}; + + // prefetched + expected_prefetched_inp_indexes = {3, 6}; + expected_pipelined_not_prefetched_input_indexes = {1, 4}; + + expected_pipelined_inp_indexes_prefetched = {1, 3}; + expected_pipelined_inp_indexes_not_prefetched = {0, 2}; + // enter in catalog EnterVarInCatalog(ng_encap_graph_id, ng_encap_node_name, expected_var_inp_indexes, expected_var_out_indexes, expected_out_indexes_need_copy); } else { + // pipelined expected_pipelined_inp_indexes = FillRange(number_of_inputs); expected_pipelined_out_indexes = FillRange(number_of_outputs); - expected_prefetched_inp_indexes = {3, 6}; - expected_pipelined_inp_indexes_prefetched = { - 3, 6}; // all inputs are pipelined + // var expected_var_inp_indexes = {}; expected_var_out_indexes = {}; expected_out_indexes_need_copy = FillRange(number_of_outputs); + + // prefetched + expected_prefetched_inp_indexes = {3, 6}; + expected_pipelined_not_prefetched_input_indexes = {0, 1, 2, 4, 5}; + + // prefetched wrt to pipelining + expected_pipelined_inp_indexes_prefetched = + expected_prefetched_inp_indexes; // all inputs are pipelined + expected_pipelined_inp_indexes_not_prefetched = + expected_pipelined_not_prefetched_input_indexes; } EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, @@ -310,6 +372,7 @@ TEST_F(NGraphTensorManagerTest, VariablesAndPrefetch) { tensor_manager.GetOutputIndexesAssigningVariables()); ASSERT_EQ(expected_out_indexes_need_copy, tensor_manager.GetOutputIndexesThatNeedCopy()); + // pipelined ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, @@ -318,8 +381,15 @@ TEST_F(NGraphTensorManagerTest, VariablesAndPrefetch) { // prefetched ASSERT_EQ(expected_prefetched_inp_indexes, tensor_manager.GetPrefetchedInputIndexes()); + ASSERT_EQ(expected_pipelined_not_prefetched_input_indexes, + tensor_manager.GetPipelinedButNotPrefetchedInputIndexes()); + + // prefetched wrt pipelined ASSERT_EQ(expected_pipelined_inp_indexes_prefetched, tensor_manager.GetPipelinedInputIndexesThatArePrefetched()); + ASSERT_EQ(expected_pipelined_inp_indexes_not_prefetched, + tensor_manager.GetPipelinedInputIndexesThatAreNotPrefetched()); + // clean up ClearCatalog(); } From d7a735f8a7cea966d57cf60be1f0a82037156d1b Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 19:04:50 -0800 Subject: [PATCH 23/67] added log --- examples/CMakeLists.txt | 6 +++ examples/axpy_pipelined_extended.py | 53 +++++++++++++++++-- ngraph_bridge/ngraph_encapsulate_op.cc | 6 ++- .../ngraph_find_replace_prefetchdataset.h | 2 +- test/python/test_axpy_pipelined.py | 10 ++-- 5 files changed, 66 insertions(+), 11 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index fcf41fd07..b7c73cdc1 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -26,4 +26,10 @@ execute_process( COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/axpy_pipelined.py ${CMAKE_CURRENT_BINARY_DIR}/axpy_pipelined.py +) + +execute_process( + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/axpy_pipelined_extended.py + ${CMAKE_CURRENT_BINARY_DIR}/axpy_pipelined_extended.py ) \ No newline at end of file diff --git a/examples/axpy_pipelined_extended.py b/examples/axpy_pipelined_extended.py index 51d3d95e8..5c06b6d2e 100644 --- a/examples/axpy_pipelined_extended.py +++ b/examples/axpy_pipelined_extended.py @@ -41,6 +41,21 @@ def build_simple_model(input_array, c1, c2): return output, pl +def build_simple_model2(input_array, c1, c2): + # Convert the numpy array to TF Tensor + input_f = tf.cast(input_array, tf.float32) + + # Define the Ops + pl = tf.placeholder(dtype=dtypes.int32) + pl_f = tf.cast(pl, tf.float32) + pl1 = tf.placeholder(dtype=dtypes.int32) + pl1_f = tf.cast(pl1, tf.float32) + mul = tf.compat.v1.math.multiply(pl1_f, input_f) + add = tf.compat.v1.math.add(mul, c1) + sub = add - pl_f + output = sub + c2 + return output, pl, pl1 + def build_data_pipeline(input_array, map_function, batch_size): dataset = (tf.data.Dataset.from_tensor_slices( (tf.constant(input_array) @@ -52,7 +67,7 @@ def build_data_pipeline(input_array, map_function, batch_size): return data_to_be_prefetched_and_used, iterator -def run_axpy_pipeline(): +def run_axpy_pipeline_extended(): input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] expected_output_array = [-1, -1, 1, -1, -1, -1, -1, -1, -1] output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] @@ -84,8 +99,41 @@ def run_axpy_pipeline(): return input_array, output_array, expected_output_array + +def run_axpy_pipeline_extended2(): + input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] + expected_output_array = [-1, -1, 1, -1, -1, -1, -1, -1, -1] + output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] + map_multiplier = 10 + + map_function = lambda x: x * map_multiplier + batch_size = 1 + pipeline, iterator = build_data_pipeline(input_array, map_function, + batch_size) + + # some constants + c1 = 5.0 + c2 = 10.0 + model, pl1, pl2 = build_simple_model2(pipeline, c1, c2) + + with tf.Session() as sess: + # Initialize the globals and the dataset + sess.run(iterator.initializer) + + for i in range(1, 10): + # Expected value is: + # Change it to run on TF if the model gets too complex + expected_output_array[i - 1] = ( + (input_array[i - 1] * map_multiplier) * (i+4)) + c1 -i + c2 + + # Run one iteration + output = sess.run(model, feed_dict={pl1: i, pl2: (i+4)}) + output_array[i - 1] = output[0] + return input_array, output_array, expected_output_array + + def main(_): - input_array, output_array, expected_output_array = run_axpy_pipeline() + input_array, output_array, expected_output_array = run_axpy_pipeline_extended() for i in range(1, 10): print("Iteration:", i, " Input: ", input_array[i - 1], " Output: ", output_array[i - 1], " Expected: ", expected_output_array[i - 1]) @@ -93,6 +141,5 @@ def main(_): if __name__ == '__main__': - os.environ['NGRAPH_TF_BACKEND'] = "INTERPRETER" #os.environ['NGRAPH_TF_USE_PREFETCH'] = "1" tf.app.run(main=main) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 72bc9bff7..5938bcc87 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -114,6 +114,7 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) //--------------------------------------------------------------------------- void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, const string& backend_name) { + NGRAPH_VLOG(1)<<"Create Parallel Executor"< encap_subgraph(new Graph(OpRegistry::Global())); @@ -185,6 +186,7 @@ void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, //--------------------------------------------------------------------------- void NGraphEncapsulateOp::CreateLegacyExecutor(OpKernelConstruction* ctx, const string& backend_name) { + NGRAPH_VLOG(1)<<"Create Legacy Executor"< tf_input_tensors; @@ -600,6 +603,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // ComputeUsingLegacyExecutor //--------------------------------------------------------------------------- void NGraphEncapsulateOp::ComputeUsingLegacyExecutor(OpKernelContext* ctx) { + NGRAPH_VLOG(1)<<"Compute using Legacy Executor"<set_assigned_device_name(prefetch_node->assigned_device_name()); - string new_name = graph->NewName("NGraph" + prefetch_node->name()); + string new_name = graph->NewName("NGraph_" + prefetch_node->name()); replacement->set_name(new_name); std::vector edges; diff --git a/test/python/test_axpy_pipelined.py b/test/python/test_axpy_pipelined.py index 2c769edec..6046fc94e 100644 --- a/test/python/test_axpy_pipelined.py +++ b/test/python/test_axpy_pipelined.py @@ -13,19 +13,18 @@ # For eg. when running the test from ngraph-bridge/build_cmake/test/python # you can add this path as below -#sys.path.insert(0, '../../examples') +sys.path.insert(0, '../../examples') from axpy_pipelined import * +from axpy_pipelined_extended import * class TestAxpyPipelined(NgraphTest): def test_axpy_pipelined(self): prefetch_env = "NGRAPH_TF_USE_PREFETCH" - ngraph_backend_i = "NGRAPH_TF_BACKEND" - env_var_map = self.store_env_variables([prefetch_env, ngraph_backend_i]) + env_var_map = self.store_env_variables([prefetch_env]) self.set_env_variable(prefetch_env, "1") - self.set_env_variable(ngraph_backend_i, "INTERPRETER") input_array, output_array, expected_output_array = run_axpy_pipeline() for i in range(1, 10): print("Iteration:", i, " Input: ", input_array[i - 1], " Output: ", @@ -36,5 +35,4 @@ def test_axpy_pipelined(self): output_array[i - 1], expected_output_array[i - 1], atol=1e-3), "Output and expected output values don't match" self.unset_env_variable(prefetch_env) - self.unset_env_variable(ngraph_backend_i) - self.restore_env_variables(env_var_map) + self.restore_env_variables(env_var_map) \ No newline at end of file From fa301de963f31edb779dbf7c421220dbe67e6d1d Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 19:52:25 -0800 Subject: [PATCH 24/67] Added logs --- ngraph_bridge/ngraph_encapsulate_op.cc | 10 +++++----- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 5938bcc87..631531e7c 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -114,7 +114,7 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) //--------------------------------------------------------------------------- void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, const string& backend_name) { - NGRAPH_VLOG(1)<<"Create Parallel Executor"< encap_subgraph(new Graph(OpRegistry::Global())); @@ -186,7 +186,7 @@ void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, //--------------------------------------------------------------------------- void NGraphEncapsulateOp::CreateLegacyExecutor(OpKernelConstruction* ctx, const string& backend_name) { - NGRAPH_VLOG(1)<<"Create Legacy Executor"< tf_input_tensors; @@ -596,14 +596,14 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { event_return_tensor.Stop(); ngraph::Event::write_trace(event_return_tensor); - NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Done"; + NGRAPH_VLOG(2) << "COMPUTE: Done "<< name(); } //--------------------------------------------------------------------------- // ComputeUsingLegacyExecutor //--------------------------------------------------------------------------- void NGraphEncapsulateOp::ComputeUsingLegacyExecutor(OpKernelContext* ctx) { - NGRAPH_VLOG(1)<<"Compute using Legacy Executor"<resource_manager()->Lookup( @@ -107,6 +108,7 @@ Status GetPipelinedIOTensorsReadyForExecution( NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " "signal prefetching"; } else { + int prefetch_buffer_depth = shared_data->GetBufferDepth(); int skip_count = shared_data->GetSkipCount(); NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth From 366c78fe90433610c85176067b54f4d504dd0003 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 19:53:12 -0800 Subject: [PATCH 25/67] Added prefetch test --- test/python/test_prefetched.py | 140 +++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 test/python/test_prefetched.py diff --git a/test/python/test_prefetched.py b/test/python/test_prefetched.py new file mode 100644 index 000000000..3eab28c4f --- /dev/null +++ b/test/python/test_prefetched.py @@ -0,0 +1,140 @@ +import sys +import pytest +import getpass +import tensorflow as tf +from tensorflow.python.framework import dtypes +tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) + +import ngraph_bridge + +import numpy as np +from common import NgraphTest + +import warnings +warnings.filterwarnings('ignore', category=FutureWarning) + +import ngraph_bridge + +class TestPrefetched(NgraphTest): + def build_data_pipeline(self,input_array, map_function, batch_size): + dataset = (tf.data.Dataset.from_tensor_slices( + (tf.constant(input_array) + )).map(map_function).batch(batch_size).prefetch(1)) + + iterator = dataset.make_initializable_iterator() + data_to_be_prefetched_and_used = iterator.get_next() + return data_to_be_prefetched_and_used, iterator + + + def build_model1(self,input_array, c1, c2): + # Convert the numpy array to TF Tensor + input_f = tf.cast(input_array, tf.float32) + + # Define the Ops + pl1 = tf.placeholder(dtype=dtypes.int32) + pl1_f = tf.cast(pl1, tf.float32) + pl2 = tf.placeholder(dtype=dtypes.int32) + pl2_f = tf.cast(pl2, tf.float32) + + mul = tf.compat.v1.math.multiply(input_f, c1) + add = tf.compat.v1.math.add(mul, pl2_f) + add2 = add + pl1_f + output = add2 - c2 + return output, pl1, pl2 + + def build_model2(self,input_array, c1, c2): + # Convert the numpy array to TF Tensor + input_f = tf.cast(input_array, tf.float32) + + # Define the Ops + pl1 = tf.placeholder(dtype=dtypes.int32) + pl1_f = tf.cast(pl1, tf.float32) + pl2 = tf.placeholder(dtype=dtypes.int32) + pl2_f = tf.cast(pl2, tf.float32) + + mul = tf.compat.v1.math.multiply(pl2_f, input_f) + add = tf.compat.v1.math.add(mul, c2) + add2 = add + pl1_f * c1 + output = add2 + return output, pl1, pl2 + + + def __run_test(self, pipeline_creator, model): + # build model + input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] + map_multiplier = 10 + map_function = lambda x: x * map_multiplier + batch_size = 1 + pipeline, iterator = pipeline_creator(input_array, map_function, + batch_size) + + # some constants + c1 = 5.0 + c2 = 10.0 + model, pl1, pl2 = model(pipeline, c1, c2) + + outputs=[] + + sess = tf.Session() + + # Initialize the globals and the dataset + sess.run(iterator.initializer) + + for i in range(1, 10): + output = sess.run(model, feed_dict={pl1: i, pl2: i+3}) + outputs.append(output) + + return outputs + + + # test hangs when "NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS" is set + def test_prefetch1(self): + # set flags + prefetch_env = "NGRAPH_TF_USE_PREFETCH" + env_var_map = self.store_env_variables([prefetch_env]) + self.set_env_variable(prefetch_env, "1") + + # Run on nGraph + ng_outputs = self.__run_test(self.build_data_pipeline,self.build_model1) + + # Reset Graph + tf.reset_default_graph() + + # Run on TF + disable_tf="NGRAPH_TF_DISABLE" + self.set_env_variable(disable_tf, "1") + tf_outputs = self.__run_test(self.build_data_pipeline,self.build_model1) + + # Compare Values + assert np.allclose(ng_outputs, tf_outputs) + + # unset env variable + self.unset_env_variable(prefetch_env) + self.unset_env_variable(disable_tf) + self.restore_env_variables(env_var_map) + + + def test_prefetch2(self): + # set flags + prefetch_env = "NGRAPH_TF_USE_PREFETCH" + env_var_map = self.store_env_variables([prefetch_env]) + self.set_env_variable(prefetch_env, "1") + + # Run on nGraph + ng_outputs = self.__run_test(self.build_data_pipeline, self.build_model2) + + # Reset Graph + tf.reset_default_graph() + + # Run on TF + disable_tf="NGRAPH_TF_DISABLE" + self.set_env_variable(disable_tf, "1") + tf_outputs = self.__run_test(self.build_data_pipeline, self.build_model2) + + # Compare Values + assert np.allclose(ng_outputs, tf_outputs) + + # unset env variable + self.unset_env_variable(prefetch_env) + self.unset_env_variable(disable_tf) + self.restore_env_variables(env_var_map) From f052c6ddef65bc60677b8d5f42c8aec5fee48e18 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 19:56:30 -0800 Subject: [PATCH 26/67] Format, removed extended file --- examples/axpy_pipelined_extended.py | 145 ------------------- ngraph_bridge/ngraph_encapsulate_op.cc | 10 +- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 3 +- test/python/test_prefetched.py | 52 +++---- 4 files changed, 33 insertions(+), 177 deletions(-) delete mode 100644 examples/axpy_pipelined_extended.py diff --git a/examples/axpy_pipelined_extended.py b/examples/axpy_pipelined_extended.py deleted file mode 100644 index 5c06b6d2e..000000000 --- a/examples/axpy_pipelined_extended.py +++ /dev/null @@ -1,145 +0,0 @@ -# ============================================================================== -# Copyright 2019 Intel Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -import warnings -warnings.filterwarnings('ignore', category=FutureWarning) -import numpy as np - -import tensorflow as tf -from tensorflow.python.framework import dtypes -tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) - -import os -import ngraph_bridge - -import sys - - -def build_simple_model(input_array, c1, c2): - # Convert the numpy array to TF Tensor - input_f = tf.cast(input_array, tf.float32) - - # Define the Ops - pl = tf.placeholder(dtype=dtypes.int32) - pl_f = tf.cast(pl, tf.float32) - mul = tf.compat.v1.math.multiply(input_f, c1) - add = tf.compat.v1.math.add(mul, c2) - add2 = add + pl_f - output = add2 - return output, pl - - -def build_simple_model2(input_array, c1, c2): - # Convert the numpy array to TF Tensor - input_f = tf.cast(input_array, tf.float32) - - # Define the Ops - pl = tf.placeholder(dtype=dtypes.int32) - pl_f = tf.cast(pl, tf.float32) - pl1 = tf.placeholder(dtype=dtypes.int32) - pl1_f = tf.cast(pl1, tf.float32) - mul = tf.compat.v1.math.multiply(pl1_f, input_f) - add = tf.compat.v1.math.add(mul, c1) - sub = add - pl_f - output = sub + c2 - return output, pl, pl1 - -def build_data_pipeline(input_array, map_function, batch_size): - dataset = (tf.data.Dataset.from_tensor_slices( - (tf.constant(input_array) - )).map(map_function).batch(batch_size).prefetch(1)) - - iterator = dataset.make_initializable_iterator() - data_to_be_prefetched_and_used = iterator.get_next() - - return data_to_be_prefetched_and_used, iterator - - -def run_axpy_pipeline_extended(): - input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] - expected_output_array = [-1, -1, 1, -1, -1, -1, -1, -1, -1] - output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] - map_multiplier = 10 - - map_function = lambda x: x * map_multiplier - batch_size = 1 - pipeline, iterator = build_data_pipeline(input_array, map_function, - batch_size) - - # some constants - c1 = 5.0 - c2 = 10.0 - model, pl = build_simple_model(pipeline, c1, c2) - - with tf.Session() as sess: - # Initialize the globals and the dataset - sess.run(iterator.initializer) - - for i in range(1, 10): - # Expected value is: - # Change it to run on TF if the model gets too complex - expected_output_array[i - 1] = ( - (input_array[i - 1] * map_multiplier) * c1) + c2 + i - - # Run one iteration - output = sess.run(model, feed_dict={pl: i}) - output_array[i - 1] = output[0] - return input_array, output_array, expected_output_array - - - -def run_axpy_pipeline_extended2(): - input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] - expected_output_array = [-1, -1, 1, -1, -1, -1, -1, -1, -1] - output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] - map_multiplier = 10 - - map_function = lambda x: x * map_multiplier - batch_size = 1 - pipeline, iterator = build_data_pipeline(input_array, map_function, - batch_size) - - # some constants - c1 = 5.0 - c2 = 10.0 - model, pl1, pl2 = build_simple_model2(pipeline, c1, c2) - - with tf.Session() as sess: - # Initialize the globals and the dataset - sess.run(iterator.initializer) - - for i in range(1, 10): - # Expected value is: - # Change it to run on TF if the model gets too complex - expected_output_array[i - 1] = ( - (input_array[i - 1] * map_multiplier) * (i+4)) + c1 -i + c2 - - # Run one iteration - output = sess.run(model, feed_dict={pl1: i, pl2: (i+4)}) - output_array[i - 1] = output[0] - return input_array, output_array, expected_output_array - - -def main(_): - input_array, output_array, expected_output_array = run_axpy_pipeline_extended() - for i in range(1, 10): - print("Iteration:", i, " Input: ", input_array[i - 1], " Output: ", - output_array[i - 1], " Expected: ", expected_output_array[i - 1]) - sys.stdout.flush() - - -if __name__ == '__main__': - #os.environ['NGRAPH_TF_USE_PREFETCH'] = "1" - tf.app.run(main=main) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 631531e7c..fd2b52bb4 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -114,7 +114,7 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) //--------------------------------------------------------------------------- void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, const string& backend_name) { - NGRAPH_VLOG(1)<<"Create Parallel Executor "< encap_subgraph(new Graph(OpRegistry::Global())); @@ -186,7 +186,7 @@ void NGraphEncapsulateOp::CreateParallelExecutor(OpKernelConstruction* ctx, //--------------------------------------------------------------------------- void NGraphEncapsulateOp::CreateLegacyExecutor(OpKernelConstruction* ctx, const string& backend_name) { - NGRAPH_VLOG(1)<<"Create Legacy Executor "< tf_input_tensors; @@ -596,14 +596,14 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { event_return_tensor.Stop(); ngraph::Event::write_trace(event_return_tensor); - NGRAPH_VLOG(2) << "COMPUTE: Done "<< name(); + NGRAPH_VLOG(2) << "COMPUTE: Done " << name(); } //--------------------------------------------------------------------------- // ComputeUsingLegacyExecutor //--------------------------------------------------------------------------- void NGraphEncapsulateOp::ComputeUsingLegacyExecutor(OpKernelContext* ctx) { - NGRAPH_VLOG(1)<<"Compute using Legacy Executor "<resource_manager()->Lookup( @@ -108,7 +108,6 @@ Status GetPipelinedIOTensorsReadyForExecution( NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: Creating the shared object to " "signal prefetching"; } else { - int prefetch_buffer_depth = shared_data->GetBufferDepth(); int skip_count = shared_data->GetSkipCount(); NGRAPH_VLOG(2) << "[PREFETCH] COMPUTE: DEPTH: " << prefetch_buffer_depth diff --git a/test/python/test_prefetched.py b/test/python/test_prefetched.py index 3eab28c4f..deb5008bd 100644 --- a/test/python/test_prefetched.py +++ b/test/python/test_prefetched.py @@ -15,8 +15,10 @@ import ngraph_bridge + class TestPrefetched(NgraphTest): - def build_data_pipeline(self,input_array, map_function, batch_size): + + def build_data_pipeline(self, input_array, map_function, batch_size): dataset = (tf.data.Dataset.from_tensor_slices( (tf.constant(input_array) )).map(map_function).batch(batch_size).prefetch(1)) @@ -25,8 +27,7 @@ def build_data_pipeline(self,input_array, map_function, batch_size): data_to_be_prefetched_and_used = iterator.get_next() return data_to_be_prefetched_and_used, iterator - - def build_model1(self,input_array, c1, c2): + def build_model1(self, input_array, c1, c2): # Convert the numpy array to TF Tensor input_f = tf.cast(input_array, tf.float32) @@ -41,8 +42,8 @@ def build_model1(self,input_array, c1, c2): add2 = add + pl1_f output = add2 - c2 return output, pl1, pl2 - - def build_model2(self,input_array, c1, c2): + + def build_model2(self, input_array, c1, c2): # Convert the numpy array to TF Tensor input_f = tf.cast(input_array, tf.float32) @@ -51,41 +52,39 @@ def build_model2(self,input_array, c1, c2): pl1_f = tf.cast(pl1, tf.float32) pl2 = tf.placeholder(dtype=dtypes.int32) pl2_f = tf.cast(pl2, tf.float32) - + mul = tf.compat.v1.math.multiply(pl2_f, input_f) add = tf.compat.v1.math.add(mul, c2) add2 = add + pl1_f * c1 output = add2 return output, pl1, pl2 - - def __run_test(self, pipeline_creator, model): + def __run_test(self, pipeline_creator, model): # build model input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] map_multiplier = 10 map_function = lambda x: x * map_multiplier batch_size = 1 pipeline, iterator = pipeline_creator(input_array, map_function, - batch_size) + batch_size) # some constants c1 = 5.0 c2 = 10.0 model, pl1, pl2 = model(pipeline, c1, c2) - outputs=[] - + outputs = [] + sess = tf.Session() # Initialize the globals and the dataset sess.run(iterator.initializer) for i in range(1, 10): - output = sess.run(model, feed_dict={pl1: i, pl2: i+3}) + output = sess.run(model, feed_dict={pl1: i, pl2: i + 3}) outputs.append(output) - - return outputs + return outputs # test hangs when "NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS" is set def test_prefetch1(self): @@ -93,47 +92,50 @@ def test_prefetch1(self): prefetch_env = "NGRAPH_TF_USE_PREFETCH" env_var_map = self.store_env_variables([prefetch_env]) self.set_env_variable(prefetch_env, "1") - + # Run on nGraph - ng_outputs = self.__run_test(self.build_data_pipeline,self.build_model1) + ng_outputs = self.__run_test(self.build_data_pipeline, + self.build_model1) # Reset Graph tf.reset_default_graph() # Run on TF - disable_tf="NGRAPH_TF_DISABLE" + disable_tf = "NGRAPH_TF_DISABLE" self.set_env_variable(disable_tf, "1") - tf_outputs = self.__run_test(self.build_data_pipeline,self.build_model1) + tf_outputs = self.__run_test(self.build_data_pipeline, + self.build_model1) # Compare Values assert np.allclose(ng_outputs, tf_outputs) - + # unset env variable self.unset_env_variable(prefetch_env) self.unset_env_variable(disable_tf) self.restore_env_variables(env_var_map) - def test_prefetch2(self): # set flags prefetch_env = "NGRAPH_TF_USE_PREFETCH" env_var_map = self.store_env_variables([prefetch_env]) self.set_env_variable(prefetch_env, "1") - + # Run on nGraph - ng_outputs = self.__run_test(self.build_data_pipeline, self.build_model2) + ng_outputs = self.__run_test(self.build_data_pipeline, + self.build_model2) # Reset Graph tf.reset_default_graph() # Run on TF - disable_tf="NGRAPH_TF_DISABLE" + disable_tf = "NGRAPH_TF_DISABLE" self.set_env_variable(disable_tf, "1") - tf_outputs = self.__run_test(self.build_data_pipeline, self.build_model2) + tf_outputs = self.__run_test(self.build_data_pipeline, + self.build_model2) # Compare Values assert np.allclose(ng_outputs, tf_outputs) - + # unset env variable self.unset_env_variable(prefetch_env) self.unset_env_variable(disable_tf) From 120621cf45574473315e4a7e6e37f125b1602ea8 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 11 Dec 2019 20:09:55 -0800 Subject: [PATCH 27/67] minor --- examples/CMakeLists.txt | 6 ------ test/python/test_axpy_pipelined.py | 3 +-- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index b7c73cdc1..0666fee83 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -27,9 +27,3 @@ execute_process( ${CMAKE_CURRENT_SOURCE_DIR}/axpy_pipelined.py ${CMAKE_CURRENT_BINARY_DIR}/axpy_pipelined.py ) - -execute_process( - COMMAND ${CMAKE_COMMAND} -E create_symlink - ${CMAKE_CURRENT_SOURCE_DIR}/axpy_pipelined_extended.py - ${CMAKE_CURRENT_BINARY_DIR}/axpy_pipelined_extended.py -) \ No newline at end of file diff --git a/test/python/test_axpy_pipelined.py b/test/python/test_axpy_pipelined.py index 6046fc94e..42163ecec 100644 --- a/test/python/test_axpy_pipelined.py +++ b/test/python/test_axpy_pipelined.py @@ -16,7 +16,6 @@ sys.path.insert(0, '../../examples') from axpy_pipelined import * -from axpy_pipelined_extended import * class TestAxpyPipelined(NgraphTest): @@ -35,4 +34,4 @@ def test_axpy_pipelined(self): output_array[i - 1], expected_output_array[i - 1], atol=1e-3), "Output and expected output values don't match" self.unset_env_variable(prefetch_env) - self.restore_env_variables(env_var_map) \ No newline at end of file + self.restore_env_variables(env_var_map) From 7fc95071dbad4682c72fc3821730139d5dc1c1ef Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 12 Dec 2019 12:14:43 -0800 Subject: [PATCH 28/67] fixed test --- .../ngraph_find_replace_prefetchdataset.h | 3 ++- ngraph_bridge/ngraph_tensor_manager.cc | 2 +- test/test_ngraph_tensor_manager.cpp | 18 +++++++++++++++--- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/ngraph_bridge/ngraph_find_replace_prefetchdataset.h b/ngraph_bridge/ngraph_find_replace_prefetchdataset.h index 7eef658c4..360517665 100644 --- a/ngraph_bridge/ngraph_find_replace_prefetchdataset.h +++ b/ngraph_bridge/ngraph_find_replace_prefetchdataset.h @@ -129,9 +129,10 @@ Status ReplacePrefetch(Graph* graph, Node* prefetch_node) { } // Finally remove the current preftetch node - graph->RemoveNode(prefetch_node); NGRAPH_VLOG(4) << "Replaced TF Prefetch Node " << prefetch_node->name() << " with NG Prefetch Node " << replacement->name(); + graph->RemoveNode(prefetch_node); + return Status::OK(); } diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 7255a0081..a659cfa65 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -95,7 +95,7 @@ void NGraphTensorManager::Initialize() { // complements m_pipelined_input_indexes_not_prefetched = FindComplement( - m_pipelined_input_indexes, m_pipelined_input_indexes_prefetched); + m_pipelined_input_indexes.size(), m_pipelined_input_indexes_prefetched); m_pipelined_not_prefetched_input_indexes = FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); } diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index 0efba0163..7eb435bdd 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -107,15 +107,20 @@ TEST_F(NGraphTensorManagerTest, NoVariablesNoPrefetch) { int number_of_inputs = 5; int number_of_outputs = 2; - NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, - ng_encap_graph_id, number_of_inputs, - number_of_outputs); // expected vector empty; vector expected_pipelined_inp_indexes = FillRange(number_of_inputs); vector expected_pipelined_out_indexes = FillRange(number_of_outputs); vector expected_out_indexes_need_copy = FillRange(number_of_outputs); + if (ngraph_tf_are_variables_enabled()) { + EnterVarInCatalog(ng_encap_graph_id, ng_encap_node_name, empty, empty, + expected_out_indexes_need_copy); + } + NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, + ng_encap_graph_id, number_of_inputs, + number_of_outputs); + // var related ASSERT_EQ(empty, tensor_manager.GetInputIndexesFedByVariables()); ASSERT_EQ(empty, tensor_manager.GetOutputIndexesAssigningVariables()); @@ -136,6 +141,8 @@ TEST_F(NGraphTensorManagerTest, NoVariablesNoPrefetch) { ASSERT_EQ(empty, tensor_manager.GetPipelinedInputIndexesThatArePrefetched()); ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexesThatAreNotPrefetched()); + // clean up + ClearCatalog(); } // Tests scenario when the graph has variables but no prefetched inputs @@ -264,6 +271,11 @@ TEST_F(NGraphTensorManagerTest, NoVariablesHasPrefetch) { vector expected_pipelined_input_indexes_not_prefetched = expected_pipelined_not_prefetched_input_indexes; + if (ngraph_tf_are_variables_enabled()) { + EnterVarInCatalog(ng_encap_graph_id, ng_encap_node_name, empty, empty, + expected_out_indexes_need_copy); + } + EnterPrefetchInCatalog(ng_encap_graph_id, ng_encap_node_name, expected_prefetched_inp_indexes); From 6d4c03705bf6d3e36fe5c04da9873a5acdc80ee4 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 12 Dec 2019 13:42:16 -0800 Subject: [PATCH 29/67] FindComplement modified --- ngraph_bridge/ngraph_utils.cc | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/ngraph_bridge/ngraph_utils.cc b/ngraph_bridge/ngraph_utils.cc index 240ed82c6..e3cad7be8 100644 --- a/ngraph_bridge/ngraph_utils.cc +++ b/ngraph_bridge/ngraph_utils.cc @@ -48,13 +48,7 @@ vector FindComplement(const int& max_element, vector superset(max_element); iota(begin(superset), end(superset), 0); - // max size of complement is superset - vector complement(superset.size()); - vector::iterator it = set_difference( - superset.begin(), superset.begin() + superset.size(), element_set.begin(), - element_set.begin() + element_set.size(), complement.begin()); - complement.resize(it - complement.begin()); - return complement; + return FindComplement(superset, element_set); } // Finds the complement of element_set From 31620bb1c00393560d5e8328aab6b017dfe06945 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 12 Dec 2019 18:46:20 -0800 Subject: [PATCH 30/67] Apply suggestions from code review Co-Authored-By: kanvi-nervana --- ngraph_bridge/ngraph_prefetch_shared_data.h | 2 +- test/test_enter_prefetch_in_catalog.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_prefetch_shared_data.h b/ngraph_bridge/ngraph_prefetch_shared_data.h index 8ce683efd..f92bfb6a0 100644 --- a/ngraph_bridge/ngraph_prefetch_shared_data.h +++ b/ngraph_bridge/ngraph_prefetch_shared_data.h @@ -90,7 +90,7 @@ class NGraphPrefetchSharedResouce : public ResourceBase { m_ng_2_tf.Add(std::move(next)); } - // Returns the Input output tensors to be ready to be executed by NG device + // Returns the Input output tensors ready to be executed by NG device // This will be called by the NGEncOp IOTensorBundle GetNextIOTensorBundleReadyForDeviceExecution() { return std::move(m_ng_2_tf.GetNextAvailable()); diff --git a/test/test_enter_prefetch_in_catalog.cc b/test/test_enter_prefetch_in_catalog.cc index 2d23af0c3..08cb4a6f5 100644 --- a/test/test_enter_prefetch_in_catalog.cc +++ b/test/test_enter_prefetch_in_catalog.cc @@ -112,11 +112,11 @@ TEST(PrefetchCatalogTest, SmallGraph2) { // Clean up NGraphCatalog::ClearCatalog(); - // Unset, Restore env flga + // Unset, restore env flags UnsetEnvVariable("NGRAPH_TF_USE_PREFETCH"); RestoreEnv(env_map); } } // namespace testing } // namespace ngraph_bridge -} // namespace tensorflow \ No newline at end of file +} // namespace tensorflow From 19154934514964a87090a4c4092883e0458fa80e Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 12 Dec 2019 18:47:22 -0800 Subject: [PATCH 31/67] incorporate review comemnts --- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 3 +-- ngraph_bridge/ngraph_prefetch_shared_data.h | 6 +----- test/python/test_axpy_pipelined.py | 2 +- test/python/test_prefetched.py | 18 ++++++++++++++++++ 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 236e15923..7f3da4a32 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -71,8 +71,7 @@ Status GetPipelinedIOTensorsReadyForExecution( // 1. Create the shared data object // 2. We get another pipelined tensor pair for the current iteration and // add it to the shared data. It will be accessed by prefetcher to copy - // the - // prefetched inputs to device + // the prefetched inputs to device auto ng_prefetch_input_indexes = tensor_manager->GetPipelinedInputIndexesThatArePrefetched(); diff --git a/ngraph_bridge/ngraph_prefetch_shared_data.h b/ngraph_bridge/ngraph_prefetch_shared_data.h index 8ce683efd..cb736798c 100644 --- a/ngraph_bridge/ngraph_prefetch_shared_data.h +++ b/ngraph_bridge/ngraph_prefetch_shared_data.h @@ -39,11 +39,9 @@ namespace ngraph_bridge { class NGraphPrefetchSharedResouce : public ResourceBase { public: explicit NGraphPrefetchSharedResouce(const std::string& ng_enc_op_name, - // const std::string& backend_name, int cluster_id, int graph_id, const vector prefetch_input_indexes) : m_ng_enc_op_name(ng_enc_op_name), - // m_backend_name(backend_name), m_graph_id(graph_id), m_cluster_id(cluster_id), m_prefetch_input_indexes(prefetch_input_indexes) {} @@ -54,7 +52,6 @@ class NGraphPrefetchSharedResouce : public ResourceBase { // Returns memory used by this resource. int64 MemoryUsed() const override { return 0; } std::string GetName() const { return m_ng_enc_op_name; } - // std::string GetBackendName() const { return m_backend_name; } int GetGraphId() const { return m_graph_id; } int GetClusterId() const { return m_cluster_id; } @@ -90,7 +87,7 @@ class NGraphPrefetchSharedResouce : public ResourceBase { m_ng_2_tf.Add(std::move(next)); } - // Returns the Input output tensors to be ready to be executed by NG device + // Returns the Input output tensors ready to be executed by NG device // This will be called by the NGEncOp IOTensorBundle GetNextIOTensorBundleReadyForDeviceExecution() { return std::move(m_ng_2_tf.GetNextAvailable()); @@ -126,7 +123,6 @@ class NGraphPrefetchSharedResouce : public ResourceBase { private: const std::string m_ng_enc_op_name; - // const std::string m_backend_name; const int m_graph_id; const int m_cluster_id; const vector m_prefetch_input_indexes; diff --git a/test/python/test_axpy_pipelined.py b/test/python/test_axpy_pipelined.py index 42163ecec..c3607d973 100644 --- a/test/python/test_axpy_pipelined.py +++ b/test/python/test_axpy_pipelined.py @@ -13,7 +13,7 @@ # For eg. when running the test from ngraph-bridge/build_cmake/test/python # you can add this path as below -sys.path.insert(0, '../../examples') +# sys.path.insert(0, '../../examples') from axpy_pipelined import * diff --git a/test/python/test_prefetched.py b/test/python/test_prefetched.py index deb5008bd..b964bb63e 100644 --- a/test/python/test_prefetched.py +++ b/test/python/test_prefetched.py @@ -1,3 +1,21 @@ +# ============================================================================== +# Copyright 2018-2019 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""nGraph TensorFlow bridge prefetch test + +""" import sys import pytest import getpass From a4f9e1fb0500cb9715634003c6798554fa75c7e6 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Fri, 13 Dec 2019 14:20:20 -0800 Subject: [PATCH 32/67] addressed review comments --- ngraph_bridge/ngraph_encapsulate_op.cc | 3 ++- ngraph_bridge/ngraph_encapsulate_op_utils.h | 15 +++++++++++++++ ngraph_bridge/ngraph_prefetch_shared_data.h | 10 ++++------ test/test_ngraph_tensor_manager.cpp | 2 +- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index fd2b52bb4..9a48d8c92 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -499,7 +499,8 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); - // Assume All inputs and outputs are pipelined + // All inputs and outputs are pipelined. + // Of all these pipelined inputs some are prefetched // TODO: Fit in variables ng_inputs = get<1>(pipelined_io_tensors); ng_outputs = get<2>(pipelined_io_tensors); diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h index 33d1476d2..7f48eb09c 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.h +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.h @@ -30,6 +30,21 @@ namespace tensorflow { namespace ngraph_bridge { +// This function does the following +// 1. Gets pipelined tensors for current execution from pipelined tensor store +// (PTS) +// 2. If prefetch is enabled +// a. if prefetch shared resource is not created +// creates it +// gets next set of tensors from PTS and adds it to the shared +// object for prefetching +// b. else +// gets the tensors from prefetch object and adds the tensors from +// step 1 to the prefetch object +// 3. Copies the tf input tensors that are not prefetched to the ngraph +// pipelined input tensors +// + Status GetPipelinedIOTensorsReadyForExecution( OpKernelContext* ctx, vector& tf_input_tensors, shared_ptr& pipelined_tensor_store, diff --git a/ngraph_bridge/ngraph_prefetch_shared_data.h b/ngraph_bridge/ngraph_prefetch_shared_data.h index cb736798c..d42abd7cf 100644 --- a/ngraph_bridge/ngraph_prefetch_shared_data.h +++ b/ngraph_bridge/ngraph_prefetch_shared_data.h @@ -38,9 +38,9 @@ namespace ngraph_bridge { class NGraphPrefetchSharedResouce : public ResourceBase { public: - explicit NGraphPrefetchSharedResouce(const std::string& ng_enc_op_name, - int cluster_id, int graph_id, - const vector prefetch_input_indexes) + explicit NGraphPrefetchSharedResouce( + const std::string& ng_enc_op_name, int cluster_id, int graph_id, + const vector& prefetch_input_indexes) : m_ng_enc_op_name(ng_enc_op_name), m_graph_id(graph_id), m_cluster_id(cluster_id), @@ -140,9 +140,7 @@ class NGraphPrefetchSharedResouce : public ResourceBase { // 1 NGEncOp pushes the Input/Output tensors to m_ng_2_tf queue // 2 // Prefetcher pulls Input/Output tensors out of m_ng_2_tf queue and - // copies - // TF - // data to the prefetched inputs + // and copies TF data to the prefetched inputs // Prefetcher pushes this item to the m_tf_2_ng queue // NGEncOp pushes the Input/Output tensors to m_ng_2_tf queue // NGEncOp pulls Input/Output tensors from m_tf_2_ng (from previous diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index 7eb435bdd..d92779efc 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -126,7 +126,7 @@ TEST_F(NGraphTensorManagerTest, NoVariablesNoPrefetch) { ASSERT_EQ(empty, tensor_manager.GetOutputIndexesAssigningVariables()); ASSERT_EQ(expected_out_indexes_need_copy, tensor_manager.GetOutputIndexesThatNeedCopy()); - // piplined + // pipelined ASSERT_EQ(expected_pipelined_inp_indexes, tensor_manager.GetPipelinedInputIndexes()); ASSERT_EQ(expected_pipelined_out_indexes, From 85368442e479d230061f3b21d3f6fd85a58ed6a2 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Fri, 13 Dec 2019 14:47:14 -0800 Subject: [PATCH 33/67] remove test-prefetch-2 --- test/python/test_prefetched.py | 43 ---------------------------------- 1 file changed, 43 deletions(-) diff --git a/test/python/test_prefetched.py b/test/python/test_prefetched.py index b964bb63e..a501474cd 100644 --- a/test/python/test_prefetched.py +++ b/test/python/test_prefetched.py @@ -61,22 +61,6 @@ def build_model1(self, input_array, c1, c2): output = add2 - c2 return output, pl1, pl2 - def build_model2(self, input_array, c1, c2): - # Convert the numpy array to TF Tensor - input_f = tf.cast(input_array, tf.float32) - - # Define the Ops - pl1 = tf.placeholder(dtype=dtypes.int32) - pl1_f = tf.cast(pl1, tf.float32) - pl2 = tf.placeholder(dtype=dtypes.int32) - pl2_f = tf.cast(pl2, tf.float32) - - mul = tf.compat.v1.math.multiply(pl2_f, input_f) - add = tf.compat.v1.math.add(mul, c2) - add2 = add + pl1_f * c1 - output = add2 - return output, pl1, pl2 - def __run_test(self, pipeline_creator, model): # build model input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] @@ -131,30 +115,3 @@ def test_prefetch1(self): self.unset_env_variable(prefetch_env) self.unset_env_variable(disable_tf) self.restore_env_variables(env_var_map) - - def test_prefetch2(self): - # set flags - prefetch_env = "NGRAPH_TF_USE_PREFETCH" - env_var_map = self.store_env_variables([prefetch_env]) - self.set_env_variable(prefetch_env, "1") - - # Run on nGraph - ng_outputs = self.__run_test(self.build_data_pipeline, - self.build_model2) - - # Reset Graph - tf.reset_default_graph() - - # Run on TF - disable_tf = "NGRAPH_TF_DISABLE" - self.set_env_variable(disable_tf, "1") - tf_outputs = self.__run_test(self.build_data_pipeline, - self.build_model2) - - # Compare Values - assert np.allclose(ng_outputs, tf_outputs) - - # unset env variable - self.unset_env_variable(prefetch_env) - self.unset_env_variable(disable_tf) - self.restore_env_variables(env_var_map) From bf8e91892764f89589740533f02138791b59aaf4 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Fri, 13 Dec 2019 15:35:30 -0800 Subject: [PATCH 34/67] renamed the vars for indexes relative to pipelined indexes --- ngraph_bridge/ngraph_tensor_manager.cc | 7 ++++--- ngraph_bridge/ngraph_tensor_manager.h | 8 ++++---- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index a659cfa65..422b217ed 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -89,13 +89,14 @@ void NGraphTensorManager::Initialize() { to_string(pref_index) + " not found in pipelined inputs."); } - m_pipelined_input_indexes_prefetched.push_back( + m_pipelined_input_indexes_that_are_prefetched.push_back( position - m_pipelined_input_indexes.begin()); } // complements - m_pipelined_input_indexes_not_prefetched = FindComplement( - m_pipelined_input_indexes.size(), m_pipelined_input_indexes_prefetched); + m_pipelined_input_indexes_that_are_not_prefetched = + FindComplement(m_pipelined_input_indexes.size(), + m_pipelined_input_indexes_that_are_prefetched); m_pipelined_not_prefetched_input_indexes = FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); } diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index 2fe2aa0a7..47f118f86 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -84,12 +84,12 @@ class NGraphTensorManager { // wrt to pipelined inputs const vector& GetPipelinedInputIndexesThatArePrefetched() { - return m_pipelined_input_indexes_prefetched; + return m_pipelined_input_indexes_that_are_prefetched; } // wrt to pipelined inputs const vector& GetPipelinedInputIndexesThatAreNotPrefetched() { - return m_pipelined_input_indexes_not_prefetched; + return m_pipelined_input_indexes_that_are_not_prefetched; } private: @@ -112,8 +112,8 @@ class NGraphTensorManager { vector m_pipelined_input_indexes; vector m_pipelined_output_indexes; // indexes wrt pipelined inputs - vector m_pipelined_input_indexes_prefetched; - vector m_pipelined_input_indexes_not_prefetched; + vector m_pipelined_input_indexes_that_are_prefetched; + vector m_pipelined_input_indexes_that_are_not_prefetched; // indexes wrt all inputs vector m_prefetched_input_indexes; From b2300b76f0556561446a933da5b10c3c3d293fa5 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Fri, 13 Dec 2019 15:53:46 -0800 Subject: [PATCH 35/67] examples --- examples/axpy_pipelined.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/axpy_pipelined.py b/examples/axpy_pipelined.py index 8bf3dc2ef..00a9aec1d 100644 --- a/examples/axpy_pipelined.py +++ b/examples/axpy_pipelined.py @@ -86,6 +86,5 @@ def main(_): if __name__ == '__main__': - os.environ['NGRAPH_TF_BACKEND'] = "INTERPRETER" #os.environ['NGRAPH_TF_USE_PREFETCH'] = "1" tf.app.run(main=main) From 5222150b78a0b5d8fe06dbd868679e6941b95379 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Fri, 13 Dec 2019 17:36:23 -0800 Subject: [PATCH 36/67] fixed hang seen when disable deassign --- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 7 ++++++- test/python/test_prefetched.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 7f3da4a32..2bb71db76 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -57,8 +57,13 @@ Status GetPipelinedIOTensorsReadyForExecution( } bool skip_tf2ng_copy = false; + // Prefetch only if there are input tensors that are prefetched && prefetch + // has been requested + // [TODO] we support prefetching only when there is atmost 1 encap + // that has prefetched inputs if (std::getenv(NGraphPrefetchSharedResouce::NGRAPH_TF_USE_PREFETCH) != - nullptr) { + nullptr && + !(tensor_manager->GetPipelinedInputIndexesThatArePrefetched()).empty()) { NGRAPH_VLOG(2) << "[PREFETCH] NGRAPH_TF_USE_PREFETCH Set"; // Set the prefetch shared obj if applicable NGraphPrefetchSharedResouce* shared_data = nullptr; diff --git a/test/python/test_prefetched.py b/test/python/test_prefetched.py index a501474cd..ea3357436 100644 --- a/test/python/test_prefetched.py +++ b/test/python/test_prefetched.py @@ -88,7 +88,6 @@ def __run_test(self, pipeline_creator, model): return outputs - # test hangs when "NGRAPH_TF_DISABLE_DEASSIGN_CLUSTERS" is set def test_prefetch1(self): # set flags prefetch_env = "NGRAPH_TF_USE_PREFETCH" From f6baec4df46aae451b529001350967da082e29c0 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Sat, 14 Dec 2019 16:42:09 -0800 Subject: [PATCH 37/67] Extended TM to store variable shared name --- ngraph_bridge/ngraph_catalog.cc | 10 +++- ngraph_bridge/ngraph_catalog.h | 4 ++ ngraph_bridge/ngraph_tensor_manager.cc | 75 ++++++++++++++++++++++++++ ngraph_bridge/ngraph_tensor_manager.h | 17 ++++++ 4 files changed, 104 insertions(+), 2 deletions(-) diff --git a/ngraph_bridge/ngraph_catalog.cc b/ngraph_bridge/ngraph_catalog.cc index 95b65f506..169478467 100644 --- a/ngraph_bridge/ngraph_catalog.cc +++ b/ngraph_bridge/ngraph_catalog.cc @@ -171,12 +171,18 @@ bool NGraphCatalog::ExistsInEncapOutputInfoMap(const string& key) { bool NGraphCatalog::ExistsInEncapOutputInfoMap(const int& graphid, const string& node_name, - const int& input_index) { - string key = NGraphCatalog::CreateNodeKey(graphid, node_name, input_index); + const int& output_index) { + string key = NGraphCatalog::CreateNodeKey(graphid, node_name, output_index); auto itr = NGraphCatalog::encap_output_info_map_.find(key); return itr != NGraphCatalog::encap_output_info_map_.end(); } +const tuple& NGraphCatalog::GetInfoFromEncapOutputInfoMap( + const int& graphid, const string& node_name, const int& output_index) { + string key = NGraphCatalog::CreateNodeKey(graphid, node_name, output_index); + return NGraphCatalog::GetInfoFromEncapOutputInfoMap(key); +} + const tuple& NGraphCatalog::GetInfoFromEncapOutputInfoMap( const string& key) { return NGraphCatalog::encap_output_info_map_.at(key); diff --git a/ngraph_bridge/ngraph_catalog.h b/ngraph_bridge/ngraph_catalog.h index 85c4c45a2..c27b3bccf 100644 --- a/ngraph_bridge/ngraph_catalog.h +++ b/ngraph_bridge/ngraph_catalog.h @@ -138,6 +138,10 @@ class NGraphCatalog { const int& output_index); static const tuple& GetInfoFromEncapOutputInfoMap( const string& key); + + static const tuple& GetInfoFromEncapOutputInfoMap( + const int& graphid, const string& node_name, const int& output_index); + static const string& GetVariableSharedNameFromEncapOutputInfoMap( const string& key); static const bool& GetCopyToTFFromEncapOutputInfoMap(const string& key); diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 422b217ed..e21a8663f 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -42,16 +42,44 @@ NGraphTensorManager::NGraphTensorManager(const string ng_encap_node_name, void NGraphTensorManager::Initialize() { #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) + + // input variables book-keeping for (int index = 0; index < m_number_of_inputs; index++) { if (NGraphCatalog::ExistsInInputVariableSharedNameMap( m_ng_encap_graph_id, m_ng_encap_node_name, index)) { m_input_indexes_from_variables.push_back(index); + // store the variable shared name + try { + auto shared_name = NGraphCatalog::GetInputVariableSharedName( + m_ng_encap_graph_id, m_ng_encap_node_name, index); + input_variable_shared_name_map.insert({index, shared_name}); + } catch { + throw runtime_error( + "Could not find variable shared name in catalog for input index " + + to_string(index) + "for encapsulate op " + m_ng_encap_node_name); + } } } + + // output variables book-keeping + // these weights are updated in place for (int index = 0; index < m_number_of_outputs; index++) { if (NGraphCatalog::ExistsInEncapOutputInfoMap( m_ng_encap_graph_id, m_ng_encap_node_name, index)) { m_output_indexes_assigning_variable.push_back(index); + + // store the output variable shared name + copy_to_tf info + try { + auto shared_name_copy_to_tf = + NGraphCatalog::GetInfoFromEncapOutputInfoMap( + m_ng_encap_graph_id, m_ng_encap_node_name, index); + output_variable_info_map.insert({index, shared_name_copy_to_tf}); + } catch { + throw runtime_error( + "Could not find variable shared name and copy_to_tf information in " + "catalog for output index " + + to_string(index) + " for encapsulate op " + m_ng_encap_node_name); + } } if (NGraphCatalog::EncapOutputIndexNeedsCopy(m_ng_encap_graph_id, m_ng_encap_node_name, index)) { @@ -106,5 +134,52 @@ void NGraphTensorManager::Initialize() { //--------------------------------------------------------------------------- NGraphTensorManager::~NGraphTensorManager() {} +//--------------------------------------------------------------------------- +// NGraphTensorManager::GetInputVariableSharedName +//--------------------------------------------------------------------------- +Status NGraphTensorManager::GetInputVariableSharedName( + const int& input_index, string* input_var_shared_name) { + auto itr = input_variable_shared_name_map.find(input_index); + if (itr == input_variable_shared_name_map.end()) { + return errors::Internal( + "Could not find shared name for input index in tensor manager ", + input_index); + } + *input_var_shared_name = itr->second; + return Status::OK(); +} + +//--------------------------------------------------------------------------- +// NGraphTensorManager::GetOutputVariableSharedName +//--------------------------------------------------------------------------- +Status NGraphTensorManager::GetOutputVariableSharedName( + const int& output_index, string* output_var_shared_name) { + auto itr = output_variable_info_map.find(output_index); + if (itr == output_variable_info_map.end()) { + return errors::Internal( + "Could not find shared name and copy_to_tf info for output index in " + "tensor manager ", + output_index); + } + *output_var_shared_name = get<0>(itr->second); + return Status::OK(); +} + +//--------------------------------------------------------------------------- +// NGraphTensorManager::GetOutputVariableCopyToTF +//--------------------------------------------------------------------------- +Status NGraphTensorManager::GetOutputVariableCopyToTF( + const int& output_index, bool* output_var_copy_to_tf) { + auto itr = output_variable_info_map.find(output_index); + if (itr == output_variable_info_map.end()) { + return errors::Internal( + "Could not find shared name and copy_to_tf info for output index in " + "tensor manager ", + output_index); + } + *output_var_copy_to_tf = get<1>(itr->second); + return Status::OK(); +} + } // namespace ngraph_bridge } // namespace tensorflow \ No newline at end of file diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index 47f118f86..df0fc55ac 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -92,6 +92,18 @@ class NGraphTensorManager { return m_pipelined_input_indexes_that_are_not_prefetched; } + // input variable shared name + Status GetInputVariableSharedName(const int& input_index, + string* input_var_shared_name); + + // output variable shared name + Status GetOutputVariableSharedName(const int& output_index, + string* output_var_shared_name); + + // input variable shared name + Status GetOutputVariableCopyToTF(const int& output_index, + bool* output_var_copy_to_tf); + private: void Initialize(); string m_ng_encap_node_name; @@ -107,6 +119,7 @@ class NGraphTensorManager { vector m_output_indexes_that_need_copy; // All indexes that are not from/to variables + // Book-keeping primarily for data pipelining // These are pipelined, some of these are also prefetched // indexes wrt all inputs/outputs vector m_pipelined_input_indexes; @@ -118,6 +131,10 @@ class NGraphTensorManager { // indexes wrt all inputs vector m_prefetched_input_indexes; vector m_pipelined_not_prefetched_input_indexes; + + // Book-keeping for weights-on-device optimizations + unordered_map input_variable_shared_name_map; + unordered_map> output_variable_info_map; }; } // namespace ngraph_bridge From fae2335f45606613ffcf5c1f9016a4a924e292d4 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Sat, 14 Dec 2019 17:19:29 -0800 Subject: [PATCH 38/67] added test --- ngraph_bridge/ngraph_tensor_manager.h | 6 +- test/test_ngraph_tensor_manager.cpp | 88 +++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index df0fc55ac..d0e394dcd 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -92,15 +92,15 @@ class NGraphTensorManager { return m_pipelined_input_indexes_that_are_not_prefetched; } - // input variable shared name + // input ng-variable shared name Status GetInputVariableSharedName(const int& input_index, string* input_var_shared_name); - // output variable shared name + // output ng-variable shared name Status GetOutputVariableSharedName(const int& output_index, string* output_var_shared_name); - // input variable shared name + // does output ng-variable's host-TF tensor needs to be updated Status GetOutputVariableCopyToTF(const int& output_index, bool* output_var_copy_to_tf); diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index d92779efc..81d107ced 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -62,6 +62,7 @@ class NGraphTensorManagerTest : public ::testing::Test { ng_encap_graph_id, ng_encap_node_name, indexes_need_copy); } + // Utility to Simulate entering prefetch info in catalog void EnterPrefetchInCatalog(const int& ng_encap_graph_id, const string& ng_encap_node_name, const vector& prefetched_inp_indexes) { @@ -82,6 +83,24 @@ class NGraphTensorManagerTest : public ::testing::Test { iota(vout.begin(), vout.end(), 0); return vout; } + + // Utility to Simulate entering variable shared name info info in catalog + void EnterVarSharedInfoInCatalog( + const int& ng_encap_graph_id, const string& ng_encap_node_name, + const unordered_map& input_var_info_map, + const unordered_map>& output_var_info_map) { + for (auto itr : input_var_info_map) { + string key = NGraphCatalog::CreateNodeKey(ng_encap_graph_id, + ng_encap_node_name, itr.first); + NGraphCatalog::AddToInputVariableSharedNameMap(key, itr.second); + } + + for (auto itr : output_var_info_map) { + string key = NGraphCatalog::CreateNodeKey(ng_encap_graph_id, + ng_encap_node_name, itr.first); + NGraphCatalog::AddToEncapOutputInfoMap(key, itr.second); + } + } }; TEST(NGraphUtils, FindComplement1) { @@ -427,6 +446,75 @@ TEST_F(NGraphTensorManagerTest, PrefetchNotInPipeline) { ClearCatalog(); } +// check book-keeping of shared information +TEST_F(NGraphTensorManagerTest, SharedName) { + string ng_encap_node_name = "xyz_1"; + int ng_encap_cluster_id = 1; + int ng_encap_graph_id = 1; + int number_of_inputs = 5; + int number_of_outputs = 2; + + unordered_map input_var_info_map = {{0, "A"}, {3, "C"}}; + unordered_map> output_var_info_map = { + {1, make_tuple("X", false)}, {5, make_tuple("Y", true)}}; + + EnterVarSharedInfoInCatalog(ng_encap_graph_id, ng_encap_node_name, + input_var_info_map, output_var_info_map); + + NGraphTensorManager tensor_manager(ng_encap_node_name, ng_encap_cluster_id, + ng_encap_graph_id, number_of_inputs, + number_of_outputs); + + if (ngraph_tf_are_variables_enabled()) { + string shared_name; + bool copy_to_tf; + // input var + ASSERT_OK(tensor_manager.GetInputVariableSharedName(0, &shared_name)); + ASSERT_EQ(shared_name, "A"); + ASSERT_OK(tensor_manager.GetInputVariableSharedName(3, &shared_name)); + ASSERT_EQ(shared_name, "C"); + + ASSERT_NOT_OK(tensor_manager.GetInputVariableSharedName(2, &shared_name)); + + // output var + ASSERT_OK(tensor_manager.GetOutputVariableSharedName(1, &shared_name)); + ASSERT_EQ(shared_name, "X"); + ASSERT_OK(tensor_manager.GetOutputVariableSharedName(5, &shared_name)); + ASSERT_EQ(shared_name, "Y"); + + ASSERT_NOT_OK(tensor_manager.GetOutputVariableSharedName(2, &shared_name)); + + // output var copy_to_tf + ASSERT_OK(tensor_manager.GetOutputVariableCopyToTF(1, ©_to_tf)); + ASSERT_FALSE(copy_to_tf); + ASSERT_OK(tensor_manager.GetOutputVariableCopyToTF(5, ©_to_tf)); + ASSERT_TRUE(copy_to_tf); + + ASSERT_NOT_OK(tensor_manager.GetOutputVariableCopyToTF(2, ©_to_tf)); + + } else { + string shared_name; + bool copy_to_tf; + // input var + ASSERT_NOT_OK(tensor_manager.GetInputVariableSharedName(0, &shared_name)); + ASSERT_NOT_OK(tensor_manager.GetInputVariableSharedName(3, &shared_name)); + ASSERT_NOT_OK(tensor_manager.GetInputVariableSharedName(2, &shared_name)); + + // output var + ASSERT_NOT_OK(tensor_manager.GetOutputVariableSharedName(1, &shared_name)); + ASSERT_NOT_OK(tensor_manager.GetOutputVariableSharedName(5, &shared_name)); + ASSERT_NOT_OK(tensor_manager.GetOutputVariableSharedName(2, &shared_name)); + + // output var copy_to_tf + ASSERT_NOT_OK(tensor_manager.GetOutputVariableCopyToTF(1, ©_to_tf)); + ASSERT_NOT_OK(tensor_manager.GetOutputVariableCopyToTF(5, ©_to_tf)); + ASSERT_NOT_OK(tensor_manager.GetOutputVariableCopyToTF(2, ©_to_tf)); + } + + // clean up + ClearCatalog(); +} + } // namespace testing } // namespace ngraph_bridge } // namespace tensorflow \ No newline at end of file From b1120c17af381e5b08df00441c2573147108ca70 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Sat, 14 Dec 2019 18:43:52 -0800 Subject: [PATCH 39/67] fix axpy var test --- examples/CMakeLists.txt | 6 ++++++ test/python/test_axpy_var_pipelined.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 0666fee83..c4c224953 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -27,3 +27,9 @@ execute_process( ${CMAKE_CURRENT_SOURCE_DIR}/axpy_pipelined.py ${CMAKE_CURRENT_BINARY_DIR}/axpy_pipelined.py ) + +execute_process( + COMMAND ${CMAKE_COMMAND} -E create_symlink + ${CMAKE_CURRENT_SOURCE_DIR}/axpy_var_pipelined.py + ${CMAKE_CURRENT_BINARY_DIR}/axpy_var_pipelined.py +) \ No newline at end of file diff --git a/test/python/test_axpy_var_pipelined.py b/test/python/test_axpy_var_pipelined.py index 272e34c56..3f220b600 100644 --- a/test/python/test_axpy_var_pipelined.py +++ b/test/python/test_axpy_var_pipelined.py @@ -13,17 +13,17 @@ # For eg. when running the test from ngraph-bridge/build_cmake/test/python # you can add this path as below -#sys.path.insert(0, '../../examples') +# sys.path.insert(0, '../../examples') from axpy_var_pipelined import * -class TestAxpyPipelined(NgraphTest): +class TestAxpyVarPipelined(NgraphTest): - def test_axpy_pipelined(self): - #prefetch_env = "NGRAPH_TF_USE_PREFETCH" - #env_var_map = self.store_env_variables([prefetch_env]) - #self.set_env_variable(prefetch_env, "1") + def test_axpy_var_pipelined(self): + prefetch_env = "NGRAPH_TF_USE_PREFETCH" + env_var_map = self.store_env_variables([prefetch_env]) + self.set_env_variable(prefetch_env, "1") input_array, output_array, expected_output_array = run_axpy_pipeline() for i in range(1, 10): print("Iteration:", i, " Input: ", input_array[i - 1], " Output: ", @@ -33,5 +33,5 @@ def test_axpy_pipelined(self): assert np.allclose( output_array[i - 1], expected_output_array[i - 1], atol=1e-3), "Output and expected output values don't match" - #self.unset_env_variable(prefetch_env) - #self.restore_env_variables(env_var_map) + self.unset_env_variable(prefetch_env) + self.restore_env_variables(env_var_map) From 06c93efd90367ba3201f746e7da3856a3018493a Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Sat, 14 Dec 2019 19:10:34 -0800 Subject: [PATCH 40/67] fixed var tests --- ngraph_bridge/ngraph_tensor_manager.cc | 21 ++++++++------------- test/test_ngraph_tensor_manager.cpp | 11 +++++++++-- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index e21a8663f..52a3e1bfe 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -53,7 +53,7 @@ void NGraphTensorManager::Initialize() { auto shared_name = NGraphCatalog::GetInputVariableSharedName( m_ng_encap_graph_id, m_ng_encap_node_name, index); input_variable_shared_name_map.insert({index, shared_name}); - } catch { + } catch (const std::exception& exp) { throw runtime_error( "Could not find variable shared name in catalog for input index " + to_string(index) + "for encapsulate op " + m_ng_encap_node_name); @@ -74,7 +74,7 @@ void NGraphTensorManager::Initialize() { NGraphCatalog::GetInfoFromEncapOutputInfoMap( m_ng_encap_graph_id, m_ng_encap_node_name, index); output_variable_info_map.insert({index, shared_name_copy_to_tf}); - } catch { + } catch (const std::exception& exp) { throw runtime_error( "Could not find variable shared name and copy_to_tf information in " "catalog for output index " + @@ -141,9 +141,8 @@ Status NGraphTensorManager::GetInputVariableSharedName( const int& input_index, string* input_var_shared_name) { auto itr = input_variable_shared_name_map.find(input_index); if (itr == input_variable_shared_name_map.end()) { - return errors::Internal( - "Could not find shared name for input index in tensor manager ", - input_index); + return errors::Internal("Could not find shared name info for input index ", + input_index, " in tensor manager "); } *input_var_shared_name = itr->second; return Status::OK(); @@ -156,10 +155,8 @@ Status NGraphTensorManager::GetOutputVariableSharedName( const int& output_index, string* output_var_shared_name) { auto itr = output_variable_info_map.find(output_index); if (itr == output_variable_info_map.end()) { - return errors::Internal( - "Could not find shared name and copy_to_tf info for output index in " - "tensor manager ", - output_index); + return errors::Internal("Could not find shared name info for output index ", + output_index, " in tensor manager"); } *output_var_shared_name = get<0>(itr->second); return Status::OK(); @@ -172,10 +169,8 @@ Status NGraphTensorManager::GetOutputVariableCopyToTF( const int& output_index, bool* output_var_copy_to_tf) { auto itr = output_variable_info_map.find(output_index); if (itr == output_variable_info_map.end()) { - return errors::Internal( - "Could not find shared name and copy_to_tf info for output index in " - "tensor manager ", - output_index); + return errors::Internal("Could not find copy_to_tf info for output index ", + output_index, " in tensor manager"); } *output_var_copy_to_tf = get<1>(itr->second); return Status::OK(); diff --git a/test/test_ngraph_tensor_manager.cpp b/test/test_ngraph_tensor_manager.cpp index 81d107ced..42a570ae3 100644 --- a/test/test_ngraph_tensor_manager.cpp +++ b/test/test_ngraph_tensor_manager.cpp @@ -452,11 +452,13 @@ TEST_F(NGraphTensorManagerTest, SharedName) { int ng_encap_cluster_id = 1; int ng_encap_graph_id = 1; int number_of_inputs = 5; - int number_of_outputs = 2; + int number_of_outputs = 6; unordered_map input_var_info_map = {{0, "A"}, {3, "C"}}; unordered_map> output_var_info_map = { - {1, make_tuple("X", false)}, {5, make_tuple("Y", true)}}; + {1, make_tuple("X", false)}, + {5, make_tuple("Y", true)}, + {0, make_tuple("Z", false)}}; EnterVarSharedInfoInCatalog(ng_encap_graph_id, ng_encap_node_name, input_var_info_map, output_var_info_map); @@ -483,6 +485,8 @@ TEST_F(NGraphTensorManagerTest, SharedName) { ASSERT_EQ(shared_name, "Y"); ASSERT_NOT_OK(tensor_manager.GetOutputVariableSharedName(2, &shared_name)); + ASSERT_OK(tensor_manager.GetOutputVariableSharedName(0, &shared_name)); + ASSERT_EQ(shared_name, "Z"); // output var copy_to_tf ASSERT_OK(tensor_manager.GetOutputVariableCopyToTF(1, ©_to_tf)); @@ -492,6 +496,9 @@ TEST_F(NGraphTensorManagerTest, SharedName) { ASSERT_NOT_OK(tensor_manager.GetOutputVariableCopyToTF(2, ©_to_tf)); + ASSERT_OK(tensor_manager.GetOutputVariableCopyToTF(0, ©_to_tf)); + ASSERT_FALSE(copy_to_tf); + } else { string shared_name; bool copy_to_tf; From f89a28e08b6248da6627cde4f16ab8bb52e05140 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Sat, 14 Dec 2019 19:48:09 -0800 Subject: [PATCH 41/67] Fixed axpy pipelined py --- examples/axpy_var_pipelined.py | 41 +++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/examples/axpy_var_pipelined.py b/examples/axpy_var_pipelined.py index 63cbdafb0..017d860e8 100644 --- a/examples/axpy_var_pipelined.py +++ b/examples/axpy_var_pipelined.py @@ -32,10 +32,12 @@ def build_simple_model(input_array, tensor_var, var_modifier, array_multiplier): # Define the Ops mul = tf.compat.v1.math.multiply(input_array, array_multiplier) - tensor_var_assign = tensor_var.assign(tensor_var + var_modifier) - add = tf.compat.v1.math.add(mul, tensor_var_assign) - output = add - return output + add = tf.compat.v1.math.add(mul, tensor_var) + train_step = tensor_var.assign(add + var_modifier) + + with tf.control_dependencies([train_step]): + train_op = tf.no_op('train_op') + return add, train_op def build_data_pipeline(input_array, map_function, batch_size): @@ -51,21 +53,23 @@ def build_data_pipeline(input_array, map_function, batch_size): def run_axpy_pipeline(): input_array = [1, 2, 3, 4, 5, 6, 7, 8, 9] - expected_output_array = [-1, -1, -1, -1, -1, -1, -1, -1, -1] - output_array = [0, 0, 0, 0, 0, 0, 0, 0, 0] + multiplier = 10 - init = tf.constant(10) - var = tf.get_variable('x', initializer=init) - for i in range(1, 10): - input_array[i - 1] = input_array[i - 1] * i * multiplier map_function = lambda x: x * multiplier batch_size = 1 pipeline, iterator = build_data_pipeline(input_array, map_function, batch_size) + var_init = 10 + init = tf.constant([var_init]) + var = tf.get_variable('x', initializer=init) + var_modifier = 1 array_multiplier = 5 model = build_simple_model(pipeline, var, var_modifier, array_multiplier) - var_sum = 11 + + expected_output_array = [] + output_array = [] + var_val = var_init with tf.Session() as sess: # Initialize the globals and the dataset sess.run(tf.global_variables_initializer()) @@ -73,12 +77,14 @@ def run_axpy_pipeline(): for i in range(1, 10): # Expected value is: - expected_output_array[i - 1] = ( - (input_array[i - 1] * multiplier) * array_multiplier) + var_sum - var_sum = var_sum + var_modifier + expected_output = ( + (input_array[i - 1] * multiplier) * array_multiplier) + var_val + expected_output_array.append(expected_output) + var_val = expected_output + var_modifier + # Run one iteration - output = sess.run(model) - output_array[i - 1] = output[0] + output, train_op = sess.run(model) + output_array.append(output[0]) return input_array, output_array, expected_output_array @@ -91,6 +97,5 @@ def main(_): if __name__ == '__main__': - os.environ['NGRAPH_TF_BACKEND'] = "INTERPRETER" - #os.environ['NGRAPH_TF_USE_PREFETCH'] = "1" + os.environ['NGRAPH_TF_USE_PREFETCH'] = "1" tf.app.run(main=main) From 25613b670706801af6c3fb4b89d48ca3c1d0ede0 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 15:07:49 -0800 Subject: [PATCH 42/67] Read only required outputs --- ngraph_bridge/ngraph_encapsulate_op.cc | 40 +++++++++++++++----------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 9a48d8c92..41106b3fe 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -540,12 +540,11 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ngraph::Event::write_trace(event_execute_graph); // Now prepare the output - ngraph::Event event_copy_output_tensor("Copy Output Tensor", "", ""); - - std::vector> output_copy_events; + // Allocate TF Tensors + vector tf_output_tensors; + ngraph::Event event_allocate_tf_output_tensors("Allocate TF Output Tensor", + "", ""); for (auto i = 0; i < ng_exec->get_results().size(); i++) { - std::unique_ptr event_copy_prep( - new ngraph::Event("Copy Prep", "", "")); auto ng_element = ng_exec->get_results()[i]; auto ng_shape = ng_element->get_shape(); auto ng_element_type = ng_element->get_element_type(); @@ -558,7 +557,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { TensorShape tf_shape(dims); Tensor* tf_output_tensor = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &tf_output_tensor)); - + tf_output_tensors.push_back(tf_output_tensor); // Make sure the nGraph-inferred element type agrees with what TensorFlow // expected. ng::element::Type expected_elem_type; @@ -569,26 +568,33 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ctx, ng_element_type == expected_elem_type, errors::Internal("Element type inferred by nGraph does not match " "the element type expected by TensorFlow")); - event_copy_prep->Stop(); - output_copy_events.push_back(std::move(event_copy_prep)); + } + event_allocate_tf_output_tensors.Stop(); + ngraph::Event::write_trace(event_allocate_tf_output_tensors); - // Now copy the nGraph Tensor to Host Tensor - std::unique_ptr event_copy_d2h( - new ngraph::Event("Device to Host Copy", "", "")); - void* dst_ptr = DMAHelper::base(tf_output_tensor); + // Copy Tensors that are required + ngraph::Event event_read_ng_tensors("Read NG Tensor", "", ""); + std::vector> output_copy_events; - ng_outputs[i]->read( - dst_ptr, ng_outputs[i]->get_element_count() * ng_element_type.size()); + auto output_indexes_to_be_copied = + tensor_manager->GetOutputIndexesThatNeedCopy(); + for (auto output_index : output_indexes_to_be_copied) { + // Copy the nGraph Tensor to Host Tensor + std::unique_ptr event_copy_d2h( + new ngraph::Event("Output_" + std::to_string(output_index), "", "")); + ng_outputs[output_index]->read( + tf_output_tensors[output_index], + ng_outputs[output_index]->get_element_count() * + ng_outputs[output_index]->get_element_type().size()); event_copy_d2h->Stop(); output_copy_events.push_back(std::move(event_copy_d2h)); } - for (auto& next : output_copy_events) { ngraph::Event::write_trace(*next.get()); } - event_copy_output_tensor.Stop(); - ngraph::Event::write_trace(event_copy_output_tensor); + event_read_ng_tensors.Stop(); + ngraph::Event::write_trace(event_read_ng_tensors); // Now return them to the cache ngraph::Event event_return_tensor("Return Tensor", "", ""); From ff18bfdc1ac00d4d814eb09bc579f7f73a174240 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 15:07:49 -0800 Subject: [PATCH 43/67] Read only required outputs --- ngraph_bridge/ngraph_encapsulate_op.cc | 40 +++++++++++++++----------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 9a48d8c92..323203290 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -540,12 +540,11 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ngraph::Event::write_trace(event_execute_graph); // Now prepare the output - ngraph::Event event_copy_output_tensor("Copy Output Tensor", "", ""); - - std::vector> output_copy_events; + // Allocate TF Tensors + vector tf_output_tensors; + ngraph::Event event_allocate_tf_output_tensors("Allocate TF Output Tensor", + "", ""); for (auto i = 0; i < ng_exec->get_results().size(); i++) { - std::unique_ptr event_copy_prep( - new ngraph::Event("Copy Prep", "", "")); auto ng_element = ng_exec->get_results()[i]; auto ng_shape = ng_element->get_shape(); auto ng_element_type = ng_element->get_element_type(); @@ -558,7 +557,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { TensorShape tf_shape(dims); Tensor* tf_output_tensor = nullptr; OP_REQUIRES_OK(ctx, ctx->allocate_output(i, tf_shape, &tf_output_tensor)); - + tf_output_tensors.push_back(tf_output_tensor); // Make sure the nGraph-inferred element type agrees with what TensorFlow // expected. ng::element::Type expected_elem_type; @@ -569,26 +568,33 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ctx, ng_element_type == expected_elem_type, errors::Internal("Element type inferred by nGraph does not match " "the element type expected by TensorFlow")); - event_copy_prep->Stop(); - output_copy_events.push_back(std::move(event_copy_prep)); + } + event_allocate_tf_output_tensors.Stop(); + ngraph::Event::write_trace(event_allocate_tf_output_tensors); - // Now copy the nGraph Tensor to Host Tensor - std::unique_ptr event_copy_d2h( - new ngraph::Event("Device to Host Copy", "", "")); - void* dst_ptr = DMAHelper::base(tf_output_tensor); + // Copy Tensors that are required + ngraph::Event event_read_ng_tensors("Read NG Tensor", "", ""); + std::vector> output_copy_events; - ng_outputs[i]->read( - dst_ptr, ng_outputs[i]->get_element_count() * ng_element_type.size()); + auto output_indexes_to_be_copied = + tensor_manager->GetOutputIndexesThatNeedCopy(); + for (auto output_index : output_indexes_to_be_copied) { + // Copy the nGraph Tensor to Host Tensor + std::unique_ptr event_copy_d2h( + new ngraph::Event("Output_" + std::to_string(output_index), "", "")); + void* dst_ptr = DMAHelper::base(tf_output_tensors[output_index]); + ng_outputs[output_index]->read( + dst_ptr, ng_outputs[output_index]->get_element_count() * + ng_outputs[output_index]->get_element_type().size()); event_copy_d2h->Stop(); output_copy_events.push_back(std::move(event_copy_d2h)); } - for (auto& next : output_copy_events) { ngraph::Event::write_trace(*next.get()); } - event_copy_output_tensor.Stop(); - ngraph::Event::write_trace(event_copy_output_tensor); + event_read_ng_tensors.Stop(); + ngraph::Event::write_trace(event_read_ng_tensors); // Now return them to the cache ngraph::Event event_return_tensor("Return Tensor", "", ""); From 3e3d887375399a64fb85c4ea30bcdcf1f55be725 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 16:00:03 -0800 Subject: [PATCH 44/67] Var uses Parallel Executor --- ngraph_bridge/ngraph_encapsulate_op.cc | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 41106b3fe..029118815 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -88,13 +88,14 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) ctx, backend != nullptr, errors::Internal("Cannot get the backend object for BE: ", be_name)); -// If we have the VARIABLE capture on then we can't use the -// parallel executor until that support is added. -#if !defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) + // // If we have the VARIABLE capture on then we can't use the + // // parallel executor until that support is added. + // #if !defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) + // m_use_parallel_executor = backend->executable_can_create_tensors(); + // #else + // m_use_parallel_executor = false; + // #endif m_use_parallel_executor = backend->executable_can_create_tensors(); -#else - m_use_parallel_executor = false; -#endif // Override the switch for debugging/testing if (std::getenv("NGRAPH_TF_USE_LEGACY_EXECUTOR") != nullptr) { From 02888d3bb47e4fb7bef2336b45af64a81bafc98c Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 17:44:44 -0800 Subject: [PATCH 45/67] Implemented IOTensorsReadyForExec --- ngraph_bridge/ngraph_encapsulate_op.cc | 18 +++--- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 65 +++++++++++++++++++- ngraph_bridge/ngraph_encapsulate_op_utils.h | 16 ++++- 3 files changed, 86 insertions(+), 13 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 029118815..637c249c6 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -500,11 +500,15 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); - // All inputs and outputs are pipelined. - // Of all these pipelined inputs some are prefetched - // TODO: Fit in variables - ng_inputs = get<1>(pipelined_io_tensors); - ng_outputs = get<2>(pipelined_io_tensors); + OP_REQUIRES_OK(ctx, GetIOTensorsReadyForExecution( + ctx, tensor_manager, get<1>(pipelined_io_tensors), + get<2>(pipelined_io_tensors), ng_inputs, ng_outputs)); + + // // All inputs and outputs are pipelined. + // // Of all these pipelined inputs some are prefetched + // // TODO: Fit in variables + // ng_inputs = get<1>(pipelined_io_tensors); + // ng_outputs = get<2>(pipelined_io_tensors); // And execute ngraph::Event event_execute_graph("Execute Graph", "", ""); @@ -581,8 +585,8 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { tensor_manager->GetOutputIndexesThatNeedCopy(); for (auto output_index : output_indexes_to_be_copied) { // Copy the nGraph Tensor to Host Tensor - std::unique_ptr event_copy_d2h( - new ngraph::Event("Output_" + std::to_string(output_index), "", "")); + std::unique_ptr event_copy_d2h(new ngraph::Event( + "D2H_Output_" + std::to_string(output_index), "", "")); ng_outputs[output_index]->read( tf_output_tensors[output_index], ng_outputs[output_index]->get_element_count() * diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 2bb71db76..eddd968e2 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -18,6 +18,10 @@ #include "ngraph_bridge/ngraph_prefetch_shared_data.h" #include "ngraph_bridge/ngraph_utils.h" +#if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) +#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" +#endif + using namespace std; namespace tensorflow { @@ -25,9 +29,9 @@ namespace tensorflow { namespace ngraph_bridge { Status GetPipelinedIOTensorsReadyForExecution( - OpKernelContext* ctx, std::vector& tf_input_tensors, - shared_ptr& pipelined_tensor_store, - shared_ptr& tensor_manager, + OpKernelContext* ctx, const vector& tf_input_tensors, + const shared_ptr& pipelined_tensor_store, + const shared_ptr& tensor_manager, std::tuple& pipelined_io_tensors) { auto io_tensors = pipelined_tensor_store->get_tensors(); @@ -222,5 +226,60 @@ Status GetPipelinedIOTensorsReadyForExecution( return Status::OK(); } +Status GetTensorFromContext(OpKernelContext* ctx, const string& shared_name, + shared_ptr& ng_tensor) { + // Get shared name from tensor manager + NGraphVar* var; + TF_RETURN_IF_ERROR(ctx->resource_manager()->Lookup( + ctx->resource_manager()->default_container(), shared_name, &var)); + ng_tensor = var->ng_tensor(); + var->Unref(); + return Status::OK(); +} + +Status GetIOTensorsReadyForExecution( + OpKernelContext* ctx, const shared_ptr& tensor_manager, + const PipelinedTensorVector& pipelined_in_tensors, + const PipelinedTensorVector& pipelined_out_tensors, + vector>& ng_inputs, + vector>& ng_outputs) { + // Get Variables that are inputs + auto var_input_indexes = tensor_manager->GetInputIndexesFedByVariables(); + for (int input_index : var_input_indexes) { + string shared_name; + TF_RETURN_IF_ERROR( + tensor_manager->GetInputVariableSharedName(input_index, &shared_name)); + TF_RETURN_IF_ERROR( + GetTensorFromContext(ctx, shared_name, ng_inputs[input_index])); + } + + // Get Variables that are outputs + auto var_output_indexes = + tensor_manager->GetOutputIndexesAssigningVariables(); + for (int output_index : var_output_indexes) { + string shared_name; + TF_RETURN_IF_ERROR(tensor_manager->GetOutputVariableSharedName( + output_index, &shared_name)); + TF_RETURN_IF_ERROR( + GetTensorFromContext(ctx, shared_name, ng_outputs[output_index])); + } + + // Fit Pipelined Input Tensors + auto pipelined_input_indexes = tensor_manager->GetPipelinedInputIndexes(); + for (int i = 0; i < pipelined_input_indexes.size(); i++) { + int input_index = pipelined_input_indexes[i]; + ng_inputs[input_index] = pipelined_in_tensors[i]; + } + + // Fit Pipelined Output Tensors + auto pipelined_output_indexes = tensor_manager->GetPipelinedOutputIndexes(); + for (int i = 0; i < pipelined_output_indexes.size(); i++) { + int output_index = pipelined_output_indexes[i]; + ng_outputs[output_index] = pipelined_out_tensors[i]; + } + + return Status::OK(); +} + } // namespace ngraph_bridge } // namespace tensorflow diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h index 7f48eb09c..64b2f432a 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.h +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.h @@ -46,12 +46,22 @@ namespace ngraph_bridge { // Status GetPipelinedIOTensorsReadyForExecution( - OpKernelContext* ctx, vector& tf_input_tensors, - shared_ptr& pipelined_tensor_store, - shared_ptr& tensor_manager, + OpKernelContext* ctx, const vector& tf_input_tensors, + const shared_ptr& pipelined_tensor_store, + const shared_ptr& tensor_manager, tuple& pipelined_io_tensors); +Status GetIOTensorsReadyForExecution( + OpKernelContext* ctx, const shared_ptr& tensor_manager, + const PipelinedTensorVector& pipelined_in_tensors, + const PipelinedTensorVector& pipelined_out_tensors, + vector>& ng_inputs, + vector>& ng_outputs); + +Status GetTensorFromContext(OpKernelContext* ctx, const string& shared_name, + shared_ptr& ng_tensor); + } // namespace ngraph_bridge } // namespace tensorflow From ca63606a08c1c82c4a816e2d634612c225045de5 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 17:51:50 -0800 Subject: [PATCH 46/67] Sync for output tensors --- ngraph_bridge/ngraph_encapsulate_op.cc | 4 ++- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 28 +++++++++++++++++++- ngraph_bridge/ngraph_encapsulate_op_utils.h | 7 ++++- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 637c249c6..9100f0f8a 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -597,10 +597,12 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { for (auto& next : output_copy_events) { ngraph::Event::write_trace(*next.get()); } - event_read_ng_tensors.Stop(); ngraph::Event::write_trace(event_read_ng_tensors); + // Synch Var Output Tensors as required + OP_REQUIRES_OK(ctx, SyncOutputVarTensors(ctx, tensor_manager)); + // Now return them to the cache ngraph::Event event_return_tensor("Return Tensor", "", ""); pipelined_tensor_store->return_tensors(current_iter_pipeline_depth); diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index eddd968e2..6cd009bee 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -226,7 +226,8 @@ Status GetPipelinedIOTensorsReadyForExecution( return Status::OK(); } -Status GetTensorFromContext(OpKernelContext* ctx, const string& shared_name, +Status GetTensorFromContext(const OpKernelContext* ctx, + const string& shared_name, shared_ptr& ng_tensor) { // Get shared name from tensor manager NGraphVar* var; @@ -281,5 +282,30 @@ Status GetIOTensorsReadyForExecution( return Status::OK(); } +Status SyncOutputVarTensors( + const OpKernelContext* ctx, + const shared_ptr& tensor_manager) { + // Get Variables that are outputs + auto var_output_indexes = + tensor_manager->GetOutputIndexesAssigningVariables(); + for (int output_index : var_output_indexes) { + bool copy_to_tf; + TF_RETURN_IF_ERROR( + tensor_manager->GetOutputVariableCopyToTF(output_index, ©_to_tf)); + + if (copy_to_tf) { + string shared_name; + TF_RETURN_IF_ERROR(tensor_manager->GetOutputVariableSharedName( + output_index, &shared_name)); + // Get shared name from tensor manager + NGraphVar* var; + TF_RETURN_IF_ERROR(ctx->resource_manager()->Lookup( + ctx->resource_manager()->default_container(), shared_name, &var)); + var->copy_ng_to_tf(); + var->Unref(); + } + } +} + } // namespace ngraph_bridge } // namespace tensorflow diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h index 64b2f432a..956a2af50 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.h +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.h @@ -59,9 +59,14 @@ Status GetIOTensorsReadyForExecution( vector>& ng_inputs, vector>& ng_outputs); -Status GetTensorFromContext(OpKernelContext* ctx, const string& shared_name, +Status GetTensorFromContext(const OpKernelContext* ctx, + const string& shared_name, shared_ptr& ng_tensor); +Status SyncOutputVarTensors( + const OpKernelContext* ctx, + const shared_ptr& tensor_manager); + } // namespace ngraph_bridge } // namespace tensorflow From 472b8d8ef5a4c6acd69d814155a3af7ec79112c0 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 17:54:57 -0800 Subject: [PATCH 47/67] Fixed output --- ngraph_bridge/ngraph_encapsulate_op.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 9100f0f8a..9cf0b2b8b 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -587,10 +587,10 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // Copy the nGraph Tensor to Host Tensor std::unique_ptr event_copy_d2h(new ngraph::Event( "D2H_Output_" + std::to_string(output_index), "", "")); + void* dst_ptr = (void*)DMAHelper::base(tf_output_tensors[output_index]); ng_outputs[output_index]->read( - tf_output_tensors[output_index], - ng_outputs[output_index]->get_element_count() * - ng_outputs[output_index]->get_element_type().size()); + dst_ptr, ng_outputs[output_index]->get_element_count() * + ng_outputs[output_index]->get_element_type().size()); event_copy_d2h->Stop(); output_copy_events.push_back(std::move(event_copy_d2h)); } From 5576a4714f9b066fc86ea619b71f25f23ad5996c Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 18:52:25 -0800 Subject: [PATCH 48/67] For non var build --- bazel/BUILD | 2 ++ ngraph_bridge/CMakeLists.txt | 2 +- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 4 +--- ngraph_bridge/{enable_variable_ops => }/ngraph_var.cc | 2 +- ngraph_bridge/{enable_variable_ops => }/ngraph_var.h | 0 5 files changed, 5 insertions(+), 5 deletions(-) rename ngraph_bridge/{enable_variable_ops => }/ngraph_var.cc (98%) rename ngraph_bridge/{enable_variable_ops => }/ngraph_var.h (100%) diff --git a/bazel/BUILD b/bazel/BUILD index 7028b6a95..034ff0dec 100644 --- a/bazel/BUILD +++ b/bazel/BUILD @@ -48,6 +48,7 @@ cc_library( "ngraph_bridge/ngraph_tensor_manager.h", "ngraph_bridge/ngraph_timer.h", "ngraph_bridge/ngraph_utils.h", + "ngraph_bridge/ngraph_var.h", "ngraph_bridge/ngraph_version_utils.h", "ngraph_bridge/tf_deadness_analysis.h", "ngraph_bridge/tf_graphcycles.h", @@ -92,6 +93,7 @@ cc_library( "ngraph_bridge/ngraph_tensor_manager.cc", "ngraph_bridge/ngraph_tracked_variable.cc", "ngraph_bridge/ngraph_utils.cc", + "ngraph_bridge/ngraph_var.cc", "ngraph_bridge/tf_deadness_analysis.cc", "ngraph_bridge/tf_graphcycles.cc", "ngraph_bridge/ops/ngraph_ops.cc", diff --git a/ngraph_bridge/CMakeLists.txt b/ngraph_bridge/CMakeLists.txt index 18d218dad..eb104ae3b 100644 --- a/ngraph_bridge/CMakeLists.txt +++ b/ngraph_bridge/CMakeLists.txt @@ -57,6 +57,7 @@ set(SRC ngraph_rewrite_pass.cc ngraph_tensor_manager.cc ngraph_tracked_variable.cc + ngraph_var.cc ngraph_utils.cc tf_graphcycles.cc tf_deadness_analysis.cc @@ -86,7 +87,6 @@ if(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) list(APPEND SRC enable_variable_ops/ngraph_tracked_variable.cc) # new files - list(APPEND SRC enable_variable_ops/ngraph_var.cc) list(APPEND SRC enable_variable_ops/ngraph_assign_op.cc) list(APPEND SRC enable_variable_ops/ngraph_enter_in_catalog.cc) list(APPEND SRC enable_variable_ops/ngraph_remove_ngraphassigns.cc) diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 6cd009bee..dac9cd83e 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -18,9 +18,7 @@ #include "ngraph_bridge/ngraph_prefetch_shared_data.h" #include "ngraph_bridge/ngraph_utils.h" -#if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" -#endif +#include "ngraph_bridge/ngraph_var.h" using namespace std; diff --git a/ngraph_bridge/enable_variable_ops/ngraph_var.cc b/ngraph_bridge/ngraph_var.cc similarity index 98% rename from ngraph_bridge/enable_variable_ops/ngraph_var.cc rename to ngraph_bridge/ngraph_var.cc index efab9e7c0..1fa6001bf 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_var.cc +++ b/ngraph_bridge/ngraph_var.cc @@ -24,10 +24,10 @@ #include "ngraph/event_tracing.hpp" #include "ngraph/runtime/backend.hpp" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_backend_manager.h" #include "ngraph_bridge/ngraph_freshness_tracker.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" using namespace std; namespace ng = ngraph; diff --git a/ngraph_bridge/enable_variable_ops/ngraph_var.h b/ngraph_bridge/ngraph_var.h similarity index 100% rename from ngraph_bridge/enable_variable_ops/ngraph_var.h rename to ngraph_bridge/ngraph_var.h From b1309172ab0dbe6c692802311cb7537b82cc467e Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Mon, 16 Dec 2019 19:33:22 -0800 Subject: [PATCH 49/67] Solved build and fix --- .../enable_variable_ops/ngraph_assign_op.cc | 2 +- .../enable_variable_ops/ngraph_tracked_variable.cc | 2 +- .../enable_variable_ops/ngraph_variable_modifiers.cc | 2 +- .../ngraph_variable_update_ng_tensor_op.cc | 2 +- ngraph_bridge/ngraph_encapsulate_impl.cc | 2 +- ngraph_bridge/ngraph_encapsulate_op.cc | 12 +++++++++++- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 6 ++++++ ngraph_bridge/ngraph_executor.cc | 2 +- test/graph_rewrites/test_ng_var_update_ng_tensor.cc | 2 +- test/test_ng_var_update_ng_tensor_kernel.cc | 2 +- 10 files changed, 25 insertions(+), 9 deletions(-) diff --git a/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc b/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc index b9f041e8b..8eb1d5336 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc @@ -25,11 +25,11 @@ #include "ngraph/event_tracing.hpp" #include "ngraph/runtime/backend.hpp" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_catalog.h" #include "ngraph_bridge/ngraph_freshness_tracker.h" #include "ngraph_bridge/ngraph_timer.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" using namespace std; namespace ng = ngraph; diff --git a/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc b/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc index c034d13c7..f821cadbc 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc @@ -23,11 +23,11 @@ #include "ngraph/event_tracing.hpp" #include "ngraph/runtime/backend.hpp" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_backend_manager.h" #include "ngraph_bridge/ngraph_catalog.h" #include "ngraph_bridge/ngraph_freshness_tracker.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" using namespace std; namespace ng = ngraph; diff --git a/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc b/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc index 5fc190bea..376a596a9 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_variable_modifiers.cc @@ -26,12 +26,12 @@ #include "ngraph/runtime/backend.hpp" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_backend_manager.h" #include "ngraph_bridge/ngraph_catalog.h" #include "ngraph_bridge/ngraph_freshness_tracker.h" #include "ngraph_bridge/ngraph_timer.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" using namespace std; namespace ng = ngraph; diff --git a/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc b/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc index fdb432f79..faee2334d 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc @@ -24,10 +24,10 @@ #include "ngraph/event_tracing.hpp" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.h" #include "ngraph_bridge/ngraph_timer.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" using namespace std; namespace ng = ngraph; diff --git a/ngraph_bridge/ngraph_encapsulate_impl.cc b/ngraph_bridge/ngraph_encapsulate_impl.cc index 7823f0a7d..f2ddf1ecd 100644 --- a/ngraph_bridge/ngraph_encapsulate_impl.cc +++ b/ngraph_bridge/ngraph_encapsulate_impl.cc @@ -45,8 +45,8 @@ #include "ngraph_bridge/ngraph_timer.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_catalog.h" #endif diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 9cf0b2b8b..bc1f9d49e 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -49,9 +49,9 @@ #include "ngraph_bridge/ngraph_prefetch_shared_data.h" #include "ngraph_bridge/ngraph_timer.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_catalog.h" #endif @@ -546,6 +546,8 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // Now prepare the output // Allocate TF Tensors + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Allocating TF Output Tensors " + << m_parallel_executor->GetNgraphClusterId(); vector tf_output_tensors; ngraph::Event event_allocate_tf_output_tensors("Allocate TF Output Tensor", "", ""); @@ -578,6 +580,9 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ngraph::Event::write_trace(event_allocate_tf_output_tensors); // Copy Tensors that are required + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Read NG Output Tensors " + << m_parallel_executor->GetNgraphClusterId(); + ngraph::Event event_read_ng_tensors("Read NG Tensor", "", ""); std::vector> output_copy_events; @@ -601,9 +606,14 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ngraph::Event::write_trace(event_read_ng_tensors); // Synch Var Output Tensors as required + NGRAPH_VLOG(4) + << "NGraphEncapsulateOp::Compute Sync NG Output Variable Tensors " + << m_parallel_executor->GetNgraphClusterId(); OP_REQUIRES_OK(ctx, SyncOutputVarTensors(ctx, tensor_manager)); // Now return them to the cache + NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Returning Tensors " + << m_parallel_executor->GetNgraphClusterId(); ngraph::Event event_return_tensor("Return Tensor", "", ""); pipelined_tensor_store->return_tensors(current_iter_pipeline_depth); diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index dac9cd83e..472a72d1e 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -286,12 +286,16 @@ Status SyncOutputVarTensors( // Get Variables that are outputs auto var_output_indexes = tensor_manager->GetOutputIndexesAssigningVariables(); + NGRAPH_VLOG(4) << "output indexes size " << var_output_indexes.size(); + for (int output_index : var_output_indexes) { + NGRAPH_VLOG(4) << "Checking Sync For " << output_index; bool copy_to_tf; TF_RETURN_IF_ERROR( tensor_manager->GetOutputVariableCopyToTF(output_index, ©_to_tf)); if (copy_to_tf) { + NGRAPH_VLOG(4) << "Sync NG Output Variable Tensors " << output_index; string shared_name; TF_RETURN_IF_ERROR(tensor_manager->GetOutputVariableSharedName( output_index, &shared_name)); @@ -301,8 +305,10 @@ Status SyncOutputVarTensors( ctx->resource_manager()->default_container(), shared_name, &var)); var->copy_ng_to_tf(); var->Unref(); + NGRAPH_VLOG(4) << "Sync Completed " << output_index; } } + return Status::OK(); } } // namespace ngraph_bridge diff --git a/ngraph_bridge/ngraph_executor.cc b/ngraph_bridge/ngraph_executor.cc index 37e1b8b40..7d4fe2c2a 100644 --- a/ngraph_bridge/ngraph_executor.cc +++ b/ngraph_bridge/ngraph_executor.cc @@ -43,9 +43,9 @@ #include "ngraph_bridge/ngraph_mark_for_clustering.h" #include "ngraph_bridge/ngraph_timer.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/ngraph_catalog.h" #endif diff --git a/test/graph_rewrites/test_ng_var_update_ng_tensor.cc b/test/graph_rewrites/test_ng_var_update_ng_tensor.cc index 0af2c7a57..924c54266 100644 --- a/test/graph_rewrites/test_ng_var_update_ng_tensor.cc +++ b/test/graph_rewrites/test_ng_var_update_ng_tensor.cc @@ -23,10 +23,10 @@ #include "tensorflow/core/platform/test.h" #include "logging/tf_graph_writer.h" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.h" #include "ngraph_bridge/ngraph_rewrite_for_tracking.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" #include "test/test_utilities.h" namespace tensorflow { diff --git a/test/test_ng_var_update_ng_tensor_kernel.cc b/test/test_ng_var_update_ng_tensor_kernel.cc index 51742fcc9..4612d156b 100644 --- a/test/test_ng_var_update_ng_tensor_kernel.cc +++ b/test/test_ng_var_update_ng_tensor_kernel.cc @@ -30,9 +30,9 @@ #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" -#include "ngraph_bridge/enable_variable_ops/ngraph_var.h" #include "ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.h" #include "ngraph_bridge/ngraph_utils.h" +#include "ngraph_bridge/ngraph_var.h" #include "test/test_utilities.h" #include "test/tf_fake_input.h" From f821bace2df1513a3cf6cfedd24500083610fc15 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Tue, 17 Dec 2019 10:04:24 -0800 Subject: [PATCH 50/67] Fix test_flib --- .../ngraph_enter_in_catalog.cc | 14 ++--- ngraph_bridge/ngraph_tensor_manager.cc | 61 +++++++++++++++++++ ngraph_bridge/ngraph_tensor_manager.h | 2 + test/python/test_flib.py | 4 ++ 4 files changed, 73 insertions(+), 8 deletions(-) diff --git a/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc b/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc index a456ef6e8..d730cf79d 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc @@ -161,14 +161,12 @@ Status EnterInCatalog(Graph* graph, int graph_id) { } // are there indexes that need copy - if (op_index_to_copy.size() > 0) { - try { - NGraphCatalog::AddToEncapOutputCopyIndexesMap(graph_id, node->name(), - op_index_to_copy); - } catch (const std::exception& exp) { - return errors::Internal( - "Caught exception while entering in catalog: ", exp.what(), "\n"); - } + try { + NGraphCatalog::AddToEncapOutputCopyIndexesMap(graph_id, node->name(), + op_index_to_copy); + } catch (const std::exception& exp) { + return errors::Internal("Caught exception while entering in catalog: ", + exp.what(), "\n"); } } // end of node is type NGraphEncapsulate diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 52a3e1bfe..11e0c5579 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -41,6 +41,9 @@ NGraphTensorManager::NGraphTensorManager(const string ng_encap_node_name, } void NGraphTensorManager::Initialize() { + cout << "Number of inputs " << m_number_of_inputs << endl; + cout << "Number of outputs " << m_number_of_outputs << endl; + #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) // input variables book-keeping @@ -86,6 +89,16 @@ void NGraphTensorManager::Initialize() { m_output_indexes_that_need_copy.push_back(index); } } + + // For graphs that were run through AOT + // Graph rewrite is not done + if (!NGraphCatalog::EncapOutputNeedsCopy(m_ng_encap_graph_id, + m_ng_encap_node_name)) { + m_output_indexes_that_need_copy.resize(m_number_of_outputs); + iota(begin(m_output_indexes_that_need_copy), + end(m_output_indexes_that_need_copy), 0); + } + #else m_output_indexes_that_need_copy.resize(m_number_of_outputs); iota(begin(m_output_indexes_that_need_copy), @@ -127,6 +140,54 @@ void NGraphTensorManager::Initialize() { m_pipelined_input_indexes_that_are_prefetched); m_pipelined_not_prefetched_input_indexes = FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); + Print(); +} + +void NGraphTensorManager::Print() { + cout << "Input indexes from Variables" << endl; + for (int index : m_input_indexes_from_variables) { + cout << index << endl; + } + + cout << "Input indexes to Assigns" << endl; + for (int index : m_output_indexes_assigning_variable) { + cout << index << endl; + } + + cout << "Input need copy" << endl; + for (int index : m_output_indexes_that_need_copy) { + cout << index << endl; + } + + cout << "Input pipelined" << endl; + for (int index : m_pipelined_input_indexes) { + cout << index << endl; + } + + cout << "Output pipelined" << endl; + for (int index : m_pipelined_output_indexes) { + cout << index << endl; + } + + cout << "prefetched " << endl; + for (int index : m_prefetched_input_indexes) { + cout << index << endl; + } + + cout << "not prefetched " << endl; + for (int index : m_pipelined_not_prefetched_input_indexes) { + cout << index << endl; + } + + cout << "pipelined prefetched " << endl; + for (int index : m_pipelined_input_indexes_that_are_prefetched) { + cout << index << endl; + } + + cout << "pipelined not prefetched " << endl; + for (int index : m_pipelined_input_indexes_that_are_not_prefetched) { + cout << index << endl; + } } //--------------------------------------------------------------------------- diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index d0e394dcd..5f0442043 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -104,6 +104,8 @@ class NGraphTensorManager { Status GetOutputVariableCopyToTF(const int& output_index, bool* output_var_copy_to_tf); + void Print(); + private: void Initialize(); string m_ng_encap_node_name; diff --git a/test/python/test_flib.py b/test/python/test_flib.py index 079e34449..7da55a85f 100644 --- a/test/python/test_flib.py +++ b/test/python/test_flib.py @@ -46,6 +46,10 @@ def test_flib_1(self): res1 = self.with_ngraph(sess_fn) res2 = self.without_ngraph(sess_fn) + print('res1') + print(res1) + print('res2') + print(res2) exp = [np.full((2, 3), 3.0), np.full((2, 3), 0.95257413)] # Note both run on Host (because NgraphEncapsulate can only run on host) assert np.isclose(res1, res2).all() From 1af0efad8a922377f7dbb609d09cf78fce594476 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 13:12:24 -0800 Subject: [PATCH 51/67] Removed Print function --- ngraph_bridge/ngraph_tensor_manager.cc | 30 -------------------------- ngraph_bridge/ngraph_tensor_manager.h | 2 -- 2 files changed, 32 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 11e0c5579..3b342d17b 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -143,36 +143,6 @@ void NGraphTensorManager::Initialize() { Print(); } -void NGraphTensorManager::Print() { - cout << "Input indexes from Variables" << endl; - for (int index : m_input_indexes_from_variables) { - cout << index << endl; - } - - cout << "Input indexes to Assigns" << endl; - for (int index : m_output_indexes_assigning_variable) { - cout << index << endl; - } - - cout << "Input need copy" << endl; - for (int index : m_output_indexes_that_need_copy) { - cout << index << endl; - } - - cout << "Input pipelined" << endl; - for (int index : m_pipelined_input_indexes) { - cout << index << endl; - } - - cout << "Output pipelined" << endl; - for (int index : m_pipelined_output_indexes) { - cout << index << endl; - } - - cout << "prefetched " << endl; - for (int index : m_prefetched_input_indexes) { - cout << index << endl; - } cout << "not prefetched " << endl; for (int index : m_pipelined_not_prefetched_input_indexes) { diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index 5f0442043..d0e394dcd 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -104,8 +104,6 @@ class NGraphTensorManager { Status GetOutputVariableCopyToTF(const int& output_index, bool* output_var_copy_to_tf); - void Print(); - private: void Initialize(); string m_ng_encap_node_name; From fe5d3e8f943207401d4f5b30bc81b3491d442f35 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 13:34:00 -0800 Subject: [PATCH 52/67] Added Traces to Encap. Some clean up --- ngraph_bridge/ngraph_encapsulate_op.cc | 34 +++++++++++--------------- ngraph_bridge/ngraph_tensor_manager.cc | 18 -------------- 2 files changed, 14 insertions(+), 38 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index bc1f9d49e..12141bf04 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -88,13 +88,7 @@ NGraphEncapsulateOp::NGraphEncapsulateOp(OpKernelConstruction* ctx) ctx, backend != nullptr, errors::Internal("Cannot get the backend object for BE: ", be_name)); - // // If we have the VARIABLE capture on then we can't use the - // // parallel executor until that support is added. - // #if !defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) - // m_use_parallel_executor = backend->executable_can_create_tensors(); - // #else - // m_use_parallel_executor = false; - // #endif + // If backend executable can create tensors we use parallel executor m_use_parallel_executor = backend->executable_can_create_tensors(); // Override the switch for debugging/testing @@ -460,6 +454,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { m_parallel_executor->GetTensorPipelineDepth())); // Get Tensor Manager and some error checking + ngraph::Event event_prepare_ng_tensors("Prepare NG In/Out Tensors", "", ""); auto tensor_manager = m_parallel_executor->GetTensorManager(); int num_of_inputs = tensor_manager->GetNumberOfInputs(); int num_of_outputs = tensor_manager->GetNumberOfOutputs(); @@ -500,15 +495,13 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { vector> ng_inputs(num_of_inputs); vector> ng_outputs(num_of_outputs); + // Prepare NG Input Output Tensors + // Retrofit Variable tensors and pipelined tensors to ng_input and ng_outputs OP_REQUIRES_OK(ctx, GetIOTensorsReadyForExecution( ctx, tensor_manager, get<1>(pipelined_io_tensors), get<2>(pipelined_io_tensors), ng_inputs, ng_outputs)); - - // // All inputs and outputs are pipelined. - // // Of all these pipelined inputs some are prefetched - // // TODO: Fit in variables - // ng_inputs = get<1>(pipelined_io_tensors); - // ng_outputs = get<2>(pipelined_io_tensors); + event_prepare_ng_tensors.Stop(); + ngraph::Event::write_trace(event_prepare_ng_tensors); // And execute ngraph::Event event_execute_graph("Execute Graph", "", ""); @@ -548,9 +541,10 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { // Allocate TF Tensors NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Allocating TF Output Tensors " << m_parallel_executor->GetNgraphClusterId(); + + ngraph::Event event_prepare_tf_output_tensors("Prepare TF Output Tensor", "", + ""); vector tf_output_tensors; - ngraph::Event event_allocate_tf_output_tensors("Allocate TF Output Tensor", - "", ""); for (auto i = 0; i < ng_exec->get_results().size(); i++) { auto ng_element = ng_exec->get_results()[i]; auto ng_shape = ng_element->get_shape(); @@ -576,14 +570,11 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { errors::Internal("Element type inferred by nGraph does not match " "the element type expected by TensorFlow")); } - event_allocate_tf_output_tensors.Stop(); - ngraph::Event::write_trace(event_allocate_tf_output_tensors); // Copy Tensors that are required NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Read NG Output Tensors " << m_parallel_executor->GetNgraphClusterId(); - ngraph::Event event_read_ng_tensors("Read NG Tensor", "", ""); std::vector> output_copy_events; auto output_indexes_to_be_copied = @@ -602,14 +593,17 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { for (auto& next : output_copy_events) { ngraph::Event::write_trace(*next.get()); } - event_read_ng_tensors.Stop(); - ngraph::Event::write_trace(event_read_ng_tensors); + event_prepare_tf_output_tensors.Stop(); + ngraph::Event::write_trace(event_prepare_tf_output_tensors); // Synch Var Output Tensors as required NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute Sync NG Output Variable Tensors " << m_parallel_executor->GetNgraphClusterId(); + ngraph::Event event_update_ngvar_tensors("Update NGVar Tensors", "", ""); OP_REQUIRES_OK(ctx, SyncOutputVarTensors(ctx, tensor_manager)); + event_update_ngvar_tensors.Stop(); + ngraph::Event::write_trace(event_update_ngvar_tensors); // Now return them to the cache NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Returning Tensors " diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 3b342d17b..ade24ede9 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -140,24 +140,6 @@ void NGraphTensorManager::Initialize() { m_pipelined_input_indexes_that_are_prefetched); m_pipelined_not_prefetched_input_indexes = FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); - Print(); -} - - - cout << "not prefetched " << endl; - for (int index : m_pipelined_not_prefetched_input_indexes) { - cout << index << endl; - } - - cout << "pipelined prefetched " << endl; - for (int index : m_pipelined_input_indexes_that_are_prefetched) { - cout << index << endl; - } - - cout << "pipelined not prefetched " << endl; - for (int index : m_pipelined_input_indexes_that_are_not_prefetched) { - cout << index << endl; - } } //--------------------------------------------------------------------------- From 878572abeba251076ca1c7e23592ac1091379e51 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 14:07:33 -0800 Subject: [PATCH 53/67] Added comments, clean up, etc --- .../ngraph_enter_in_catalog.cc | 1 - ngraph_bridge/ngraph_encapsulate_op_utils.cc | 16 ++++++++++++++-- ngraph_bridge/ngraph_encapsulate_op_utils.h | 16 ++++++++++++++++ ngraph_bridge/ngraph_tensor_manager.cc | 7 ++----- test/python/test_flib.py | 5 +---- 5 files changed, 33 insertions(+), 12 deletions(-) diff --git a/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc b/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc index d730cf79d..c96a4932e 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_enter_in_catalog.cc @@ -160,7 +160,6 @@ Status EnterInCatalog(Graph* graph, int graph_id) { } } - // are there indexes that need copy try { NGraphCatalog::AddToEncapOutputCopyIndexesMap(graph_id, node->name(), op_index_to_copy); diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 472a72d1e..34f4710eb 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -26,6 +26,9 @@ namespace tensorflow { namespace ngraph_bridge { +//--------------------------------------------------------------------------- +// GetPipelinedIOTensorsReadyForExecution +//--------------------------------------------------------------------------- Status GetPipelinedIOTensorsReadyForExecution( OpKernelContext* ctx, const vector& tf_input_tensors, const shared_ptr& pipelined_tensor_store, @@ -224,6 +227,9 @@ Status GetPipelinedIOTensorsReadyForExecution( return Status::OK(); } +//--------------------------------------------------------------------------- +// GetTensorFromContext +//--------------------------------------------------------------------------- Status GetTensorFromContext(const OpKernelContext* ctx, const string& shared_name, shared_ptr& ng_tensor) { @@ -236,6 +242,9 @@ Status GetTensorFromContext(const OpKernelContext* ctx, return Status::OK(); } +//--------------------------------------------------------------------------- +// GetIOTensorsReadyForExecution +//--------------------------------------------------------------------------- Status GetIOTensorsReadyForExecution( OpKernelContext* ctx, const shared_ptr& tensor_manager, const PipelinedTensorVector& pipelined_in_tensors, @@ -280,6 +289,9 @@ Status GetIOTensorsReadyForExecution( return Status::OK(); } +//--------------------------------------------------------------------------- +// SyncOutputVarTensors +//--------------------------------------------------------------------------- Status SyncOutputVarTensors( const OpKernelContext* ctx, const shared_ptr& tensor_manager) { @@ -289,20 +301,20 @@ Status SyncOutputVarTensors( NGRAPH_VLOG(4) << "output indexes size " << var_output_indexes.size(); for (int output_index : var_output_indexes) { - NGRAPH_VLOG(4) << "Checking Sync For " << output_index; bool copy_to_tf; TF_RETURN_IF_ERROR( tensor_manager->GetOutputVariableCopyToTF(output_index, ©_to_tf)); if (copy_to_tf) { NGRAPH_VLOG(4) << "Sync NG Output Variable Tensors " << output_index; + // Get shared name from tensor manager string shared_name; TF_RETURN_IF_ERROR(tensor_manager->GetOutputVariableSharedName( output_index, &shared_name)); - // Get shared name from tensor manager NGraphVar* var; TF_RETURN_IF_ERROR(ctx->resource_manager()->Lookup( ctx->resource_manager()->default_container(), shared_name, &var)); + // update tensor var->copy_ng_to_tf(); var->Unref(); NGRAPH_VLOG(4) << "Sync Completed " << output_index; diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h index 956a2af50..631171b4a 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.h +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.h @@ -52,6 +52,15 @@ Status GetPipelinedIOTensorsReadyForExecution( tuple& pipelined_io_tensors); +// Assembles the different types of input and output tensors into a right order +// Retrofit Variable tensors and pipelined tensors to ng_input and ng_outputs +// 1. For input indexes that are fed by variables, get the variable tensors from +// context +// 2. For output indexes that are updating variables, get the variable tensors +// from context +// This enable update-in-place +// 3. For input and output indexes that are pipelined, get the respective tensor +// Status GetIOTensorsReadyForExecution( OpKernelContext* ctx, const shared_ptr& tensor_manager, const PipelinedTensorVector& pipelined_in_tensors, @@ -59,10 +68,17 @@ Status GetIOTensorsReadyForExecution( vector>& ng_inputs, vector>& ng_outputs); +// Gets the Tensor from OpKernelContext's Container for the given shared_name Status GetTensorFromContext(const OpKernelContext* ctx, const string& shared_name, shared_ptr& ng_tensor); +// Encapsulate Op updates the NGVariable's device tensor in-place +// ie. the NGVariable's backend tensor is updated +// Some of these Variables may be required by the TF ops and they will use the +// host tensor +// These were marked as "copy-to-tf" True in the Rewrite Phase +// We will update these tensors here Status SyncOutputVarTensors( const OpKernelContext* ctx, const shared_ptr& tensor_manager); diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index ade24ede9..50c65dbc1 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -41,11 +41,7 @@ NGraphTensorManager::NGraphTensorManager(const string ng_encap_node_name, } void NGraphTensorManager::Initialize() { - cout << "Number of inputs " << m_number_of_inputs << endl; - cout << "Number of outputs " << m_number_of_outputs << endl; - #if defined(NGRAPH_TF_ENABLE_VARIABLES_AND_OPTIMIZERS) - // input variables book-keeping for (int index = 0; index < m_number_of_inputs; index++) { if (NGraphCatalog::ExistsInInputVariableSharedNameMap( @@ -91,7 +87,8 @@ void NGraphTensorManager::Initialize() { } // For graphs that were run through AOT - // Graph rewrite is not done + // Graph rewrite is not done, and there is no entry in catalog + // If there is not entry in catalog all outputs need to be copied if (!NGraphCatalog::EncapOutputNeedsCopy(m_ng_encap_graph_id, m_ng_encap_node_name)) { m_output_indexes_that_need_copy.resize(m_number_of_outputs); diff --git a/test/python/test_flib.py b/test/python/test_flib.py index 7da55a85f..f0c9b5b59 100644 --- a/test/python/test_flib.py +++ b/test/python/test_flib.py @@ -46,10 +46,7 @@ def test_flib_1(self): res1 = self.with_ngraph(sess_fn) res2 = self.without_ngraph(sess_fn) - print('res1') - print(res1) - print('res2') - print(res2) + exp = [np.full((2, 3), 3.0), np.full((2, 3), 0.95257413)] # Note both run on Host (because NgraphEncapsulate can only run on host) assert np.isclose(res1, res2).all() From 85882bdb8847658dcfff555cc93d53eda98e44c3 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 14:08:58 -0800 Subject: [PATCH 54/67] Removed ngraph-var in tracked_variable.cc --- ngraph_bridge/ngraph_tracked_variable.cc | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/ngraph_bridge/ngraph_tracked_variable.cc b/ngraph_bridge/ngraph_tracked_variable.cc index bf277b6c1..765e1a1a2 100644 --- a/ngraph_bridge/ngraph_tracked_variable.cc +++ b/ngraph_bridge/ngraph_tracked_variable.cc @@ -40,30 +40,6 @@ namespace ngraph_bridge { // (Changes: Renamed from LegacyVar, modified to take a TensorShape in // constructor.) -// THIS CLASS IS NOT BEING USED ANYWHERE -class NGraphVar : public ResourceBase { - public: - explicit NGraphVar(DataType dtype, TensorShape shape) - : tensor_(dtype, shape) {} - // Not copyable or movable. - NGraphVar(const NGraphVar&) = delete; - NGraphVar& operator=(const NGraphVar&) = delete; - - mutex* mu() { return &mu_; } - Tensor* tensor() { return &tensor_; } - - string DebugString() const override { - return strings::StrCat(DataTypeString(tensor_.dtype()), "/", - tensor_.shape().DebugString()); - } - - private: - mutex mu_; - Tensor tensor_; - - ~NGraphVar() override {} -}; - class NGraphVariableOp : public OpKernel { public: explicit NGraphVariableOp(OpKernelConstruction* context); From 983eb7b4d63c4c4e36f32b9c2eb0ba646d8a07d3 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 15:50:27 -0800 Subject: [PATCH 55/67] ngraph_tracked_variable.cc changes --- ngraph_bridge/ngraph_tracked_variable.cc | 30 +++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/ngraph_bridge/ngraph_tracked_variable.cc b/ngraph_bridge/ngraph_tracked_variable.cc index 765e1a1a2..b8dcf9325 100644 --- a/ngraph_bridge/ngraph_tracked_variable.cc +++ b/ngraph_bridge/ngraph_tracked_variable.cc @@ -40,6 +40,29 @@ namespace ngraph_bridge { // (Changes: Renamed from LegacyVar, modified to take a TensorShape in // constructor.) +// THIS CLASS IS NOT BEING USED ANYWHERE +class NGraphVar : public ResourceBase { + public: + explicit NGraphVar(DataType dtype, TensorShape shape) + : tensor_(dtype, shape) {} + // Not copyable or movable. + NGraphVar(const NGraphVar&) = delete; + NGraphVar& operator=(const NGraphVar&) = delete; + + mutex* mu() { return &mu_; } + Tensor* tensor() { return &tensor_; } + + string DebugString() const override { + return strings::StrCat(DataTypeString(tensor_.dtype()), "/", + tensor_.shape().DebugString()); + } + + private: + mutex mu_; + Tensor tensor_; + ~NGraphVar() override {} +}; + class NGraphVariableOp : public OpKernel { public: explicit NGraphVariableOp(OpKernelConstruction* context); @@ -51,6 +74,8 @@ class NGraphVariableOp : public OpKernel { bool just_looking_; NGraphFreshnessTracker* tracker_; DataType dtype_; + int ng_graph_id_; + string ng_backend_name_; mutex init_mu_; ContainerInfo cinfo_ GUARDED_BY(init_mu_); @@ -74,6 +99,9 @@ NGraphVariableOp::NGraphVariableOp(OpKernelConstruction* context) OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); OP_REQUIRES_OK(context, context->GetAttr("just_looking", &just_looking_)); + OP_REQUIRES_OK(context, context->GetAttr("ngraph_graph_id", &ng_graph_id_)); + OP_REQUIRES_OK(context, + context->GetAttr("_ngraph_backend", &ng_backend_name_)); NGRAPH_VLOG(5) << def().name() << ": just looking? " << just_looking_; } @@ -93,7 +121,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) { initialized_ = true; } auto creator = [this](NGraphVar** var) { - *var = new NGraphVar(dtype_, shape_); + *var = new NGraphVar(dtype_, shape_, ng_backend_name_); //(*var)->tensor()->set_shape(shape_); return Status::OK(); }; From c99202764613534225fef1e9c61d8902cae5aebc Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 16:43:23 -0800 Subject: [PATCH 56/67] added traces --- .../enable_variable_ops/ngraph_assign_op.cc | 2 +- .../ngraph_tracked_variable.cc | 3 ++- .../ngraph_variable_update_ng_tensor_op.cc | 1 + ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 17 +++++++++++++++-- ngraph_bridge/ngraph_tracked_variable.cc | 3 ++- 6 files changed, 22 insertions(+), 6 deletions(-) diff --git a/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc b/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc index 8eb1d5336..35099bbc7 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_assign_op.cc @@ -83,7 +83,7 @@ class NGraphAssignOp : public OpKernel { void Compute(OpKernelContext* context) override { std::ostringstream oss; - oss << "Execute: Assign_" << my_instance_id << ": " << name(); + oss << "NGAssign::Compute::" << name(); ngraph::Event event_compute(oss.str(), name(), ""); NGRAPH_VLOG(4) << "NGraphAssign:: Compute called for: " << def().name() diff --git a/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc b/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc index f821cadbc..8b5b81f68 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_tracked_variable.cc @@ -119,7 +119,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) { << " ,backend_name " << ng_backend_name_; std::ostringstream oss; - oss << "NGraphVariable: " << my_instance_id << ": " << name(); + oss << "NGVariable::Compute::" << name(); ngraph::Event event_compute(oss.str(), name(), ""); bool log_copies = false; @@ -250,6 +250,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) { ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes()); } var->Unref(); + event_compute.Stop(); ngraph::Event::write_trace(event_compute); } diff --git a/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc b/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc index faee2334d..8755f6f76 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_variable_update_ng_tensor_op.cc @@ -67,6 +67,7 @@ NGraphVariableUpdateNGTensorOp::~NGraphVariableUpdateNGTensorOp() { void NGraphVariableUpdateNGTensorOp::Compute(OpKernelContext* context) { std::ostringstream oss; // Start event tracing + oss << "NGVariableUpdateNGTensor::Compute::" << name(); ngraph::Event event_compute(oss.str(), name(), ""); bool log_copies = false; OP_REQUIRES_OK(context, diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 12141bf04..2ce5f46fc 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -397,7 +397,7 @@ NGraphEncapsulateOp::~NGraphEncapsulateOp() { // OpKernel::Compute //--------------------------------------------------------------------------- void NGraphEncapsulateOp::Compute(OpKernelContext* ctx) { - ngraph::Event event_compute("Compute", "", ""); + ngraph::Event event_compute("NGEncap::Compute::" + name(), name(), ""); if (m_use_parallel_executor) { NGRAPH_VLOG(1) << "NGraphEncapsulateOp::Compute: Using Parallel Executor"; diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index 57ee713a5..a2fe10a56 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -159,18 +159,21 @@ Status GetPipelinedIOTensorsReadyForExecution( // Allocate the input/ ngraph::Event event_copy_input_tensor("Copy Pipelined Input Tensors", "", ""); - + std::vector> input_write_events; if (!skip_tf2ng_copy) { // All pipelined inputs are copied for (auto i = 0; i < pipelined_input_indexes.size(); i++) { int tf_index = pipelined_input_indexes[i]; - ng::element::Type ng_element_type; TF_RETURN_IF_ERROR(TFDataTypeToNGraphElementType( tf_input_tensors[tf_index].dtype(), &ng_element_type)); void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[tf_index]); + + std::unique_ptr event_copy_h2d( + new ngraph::Event("H2D_Input_" + std::to_string(tf_index), "", "")); + try { ng_pipelined_inputs[i]->write( current_src_ptr, ng_pipelined_inputs[i]->get_element_count() * @@ -181,6 +184,8 @@ Status GetPipelinedIOTensorsReadyForExecution( } catch (...) { return errors::Internal("Error copying TF tensor to device tensor"); } + event_copy_h2d->Stop(); + input_write_events.push_back(std::move(event_copy_h2d)); } } else { // All pipelined inputs that are not prefetched are copied @@ -204,6 +209,8 @@ Status GetPipelinedIOTensorsReadyForExecution( tf_input_tensors[tf_index].dtype(), &ng_element_type)); void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[tf_index]); + std::unique_ptr event_copy_h2d( + new ngraph::Event("H2D_Input_" + std::to_string(tf_index), "", "")); try { ng_pipelined_inputs[ng_index]->write( current_src_ptr, @@ -215,8 +222,14 @@ Status GetPipelinedIOTensorsReadyForExecution( } catch (...) { return errors::Internal("Error copying TF tensor to device tensor"); } + event_copy_h2d->Stop(); + input_write_events.push_back(std::move(event_copy_h2d)); } } + + for (auto& next : input_write_events) { + ngraph::Event::write_trace(*next.get()); + } event_copy_input_tensor.Stop(); ngraph::Event::write_trace(event_copy_input_tensor); diff --git a/ngraph_bridge/ngraph_tracked_variable.cc b/ngraph_bridge/ngraph_tracked_variable.cc index b8dcf9325..6a55d96a3 100644 --- a/ngraph_bridge/ngraph_tracked_variable.cc +++ b/ngraph_bridge/ngraph_tracked_variable.cc @@ -112,7 +112,7 @@ NGraphVariableOp::~NGraphVariableOp() { tracker_->Unref(); } void NGraphVariableOp::Compute(OpKernelContext* ctx) { mutex_lock l(init_mu_); std::ostringstream oss; - oss << "NGraphVariable: " << my_instance_id << ": " << name(); + oss << "NGVariable::Compute::" << name(); ngraph::Event event_compute(oss.str(), name(), ""); if (!initialized_) { @@ -186,6 +186,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) { ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes()); } var->Unref(); + event_compute.Stop(); ngraph::Event::write_trace(event_compute); } From 5ae567a251ac14c96b996557b367db34c2a83bb7 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 16:54:34 -0800 Subject: [PATCH 57/67] fix build --- ngraph_bridge/ngraph_tracked_variable.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/ngraph_bridge/ngraph_tracked_variable.cc b/ngraph_bridge/ngraph_tracked_variable.cc index 6a55d96a3..22b1e584e 100644 --- a/ngraph_bridge/ngraph_tracked_variable.cc +++ b/ngraph_bridge/ngraph_tracked_variable.cc @@ -74,8 +74,6 @@ class NGraphVariableOp : public OpKernel { bool just_looking_; NGraphFreshnessTracker* tracker_; DataType dtype_; - int ng_graph_id_; - string ng_backend_name_; mutex init_mu_; ContainerInfo cinfo_ GUARDED_BY(init_mu_); @@ -99,9 +97,6 @@ NGraphVariableOp::NGraphVariableOp(OpKernelConstruction* context) OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_)); OP_REQUIRES_OK(context, context->GetAttr("just_looking", &just_looking_)); - OP_REQUIRES_OK(context, context->GetAttr("ngraph_graph_id", &ng_graph_id_)); - OP_REQUIRES_OK(context, - context->GetAttr("_ngraph_backend", &ng_backend_name_)); NGRAPH_VLOG(5) << def().name() << ": just looking? " << just_looking_; } @@ -121,7 +116,7 @@ void NGraphVariableOp::Compute(OpKernelContext* ctx) { initialized_ = true; } auto creator = [this](NGraphVar** var) { - *var = new NGraphVar(dtype_, shape_, ng_backend_name_); + *var = new NGraphVar(dtype_, shape_); //(*var)->tensor()->set_shape(shape_); return Status::OK(); }; From ab33837c1c573e3a8ea2f630561783e6c57429fe Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 19:28:59 -0800 Subject: [PATCH 58/67] Var Rewrite pass calls EnterPrefetchInCatalog,fixed header guard, tensor manager pringt utility --- .../ngraph_rewrite_pass.cc | 8 ++++ .../ngraph_enter_prefetch_in_catalog.h | 4 +- ngraph_bridge/ngraph_tensor_manager.cc | 42 +++++++++++++++++++ ngraph_bridge/ngraph_tensor_manager.h | 2 + 4 files changed, 54 insertions(+), 2 deletions(-) diff --git a/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc b/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc index b764713ab..ea97ff417 100644 --- a/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc +++ b/ngraph_bridge/enable_variable_ops/ngraph_rewrite_pass.cc @@ -30,6 +30,7 @@ #include "ngraph_bridge/ngraph_cluster_manager.h" #include "ngraph_bridge/ngraph_deassign_clusters.h" #include "ngraph_bridge/ngraph_encapsulate_clusters.h" +#include "ngraph_bridge/ngraph_enter_prefetch_in_catalog.h" #include "ngraph_bridge/ngraph_mark_for_clustering.h" #include "ngraph_bridge/ngraph_rewrite_for_tracking.h" #include "ngraph_bridge/ngraph_utils.h" @@ -255,6 +256,13 @@ class NGraphEncapsulationPass : public NGraphRewritePass { "Graph with NGraphAssigns Optimized/Removed"); } + // 8. Enter Prefetch in catalog then. + TF_RETURN_IF_ERROR(EnterPrefetchInCatalog(options.graph->get(), idx)); + if (DumpCatalogedGraphs()) { + DumpGraphs(options, idx, "prefetch-cataloged", + "Graph with Prefetched Inputs Entered in Catalog"); + } + return Status::OK(); } diff --git a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h index d7ab8cc9c..534166aa1 100644 --- a/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h +++ b/ngraph_bridge/ngraph_enter_prefetch_in_catalog.h @@ -13,8 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ -#ifndef NGRAPH_TF_ENTER_IN_CATALOG_H_ -#define NGRAPH_TF_ENTER_IN_CATALOG_H_ +#ifndef NGRAPH_TF_ENTER_PREFETCH_IN_CATALOG_H_ +#define NGRAPH_TF_ENTER_PREFETCH_IN_CATALOG_H_ #pragma once #include "tensorflow/core/graph/graph.h" diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 84ad7be90..2fbcfa4c9 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -148,6 +148,48 @@ void NGraphTensorManager::Initialize() { m_pipelined_input_indexes_that_are_prefetched); m_pipelined_not_prefetched_input_indexes = FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); + Print(); +} + +//--------------------------------------------------------------------------- +// PrintVector Utility +//--------------------------------------------------------------------------- +void PrintVector(const vector& input_vector, const string title) { + cout << title << endl; + std::stringstream ss; + for (int val : input_vector) { + ss << val << " "; + } + cout << ss.str() << endl; +} + +//--------------------------------------------------------------------------- +// NGraphTensorManager::Print +//--------------------------------------------------------------------------- +void NGraphTensorManager::Print() { + cout << "** NGEncapsulate TensorManager:" << m_ng_encap_node_name << " **" + << endl; + + cout << "** Variables Related **" << endl; + PrintVector(m_input_indexes_from_variables, "Input Indexes from Variables"); + PrintVector(m_output_indexes_assigning_variable, + "Output Indexes Referring to Variables"); + PrintVector(m_output_indexes_that_need_copy, "Output Indexes to be Read"); + + cout << "** Pipelined **" << endl; + PrintVector(m_pipelined_input_indexes, "Pipelined Input Indexes"); + PrintVector(m_pipelined_output_indexes, "Pipelined Output Indexes"); + + cout << "** Prefetched **" << endl; + PrintVector(m_prefetched_input_indexes, "Prefetched Input Indexes"); + PrintVector(m_pipelined_not_prefetched_input_indexes, + "Pipelined But Not Prefetched Input Indexes"); + + cout << "** Prefetched wrt pipelined indexes **" << endl; + PrintVector(m_pipelined_input_indexes_that_are_prefetched, + "Prefetched Input Indexes wrt Pipelined Inputs"); + PrintVector(m_pipelined_input_indexes_that_are_not_prefetched, + "Not Prefetched Input Indexes wrt Pipelined Inputs"); } //--------------------------------------------------------------------------- diff --git a/ngraph_bridge/ngraph_tensor_manager.h b/ngraph_bridge/ngraph_tensor_manager.h index 9143241fb..73f2ca9d4 100644 --- a/ngraph_bridge/ngraph_tensor_manager.h +++ b/ngraph_bridge/ngraph_tensor_manager.h @@ -109,6 +109,8 @@ class NGraphTensorManager { Status GetOutputVariableCopyToTF(const int& output_index, bool* output_var_copy_to_tf); + void Print(); + private: void Initialize(); string m_ng_encap_node_name; From f8ae9372295091f7514967307e30f508ce16ace9 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Wed, 18 Dec 2019 19:36:41 -0800 Subject: [PATCH 59/67] small fix --- ngraph_bridge/ngraph_tensor_manager.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 2fbcfa4c9..9cd03d574 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -148,7 +148,6 @@ void NGraphTensorManager::Initialize() { m_pipelined_input_indexes_that_are_prefetched); m_pipelined_not_prefetched_input_indexes = FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); - Print(); } //--------------------------------------------------------------------------- From 85f9a390a4dd80c7c1d691d210957ee6614afd63 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 13:37:55 -0800 Subject: [PATCH 60/67] incorporate review comments --- ngraph_bridge/ngraph_tensor_manager.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 9cd03d574..5de0e2e32 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -88,7 +88,7 @@ void NGraphTensorManager::Initialize() { // For graphs that were run through AOT // Graph rewrite is not done, and there is no entry in catalog - // If there is not entry in catalog all outputs need to be copied + // If there is no entry in catalog all outputs need to be copied if (!NGraphCatalog::EncapOutputNeedsCopy(m_ng_encap_graph_id, m_ng_encap_node_name)) { m_output_indexes_that_need_copy.resize(m_number_of_outputs); @@ -155,11 +155,7 @@ void NGraphTensorManager::Initialize() { //--------------------------------------------------------------------------- void PrintVector(const vector& input_vector, const string title) { cout << title << endl; - std::stringstream ss; - for (int val : input_vector) { - ss << val << " "; - } - cout << ss.str() << endl; + cout << ng::join(input_vector) << endl; } //--------------------------------------------------------------------------- From a5f9ff28460e9fc0f45092266b5ade3ea1c56b4c Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 13:38:51 -0800 Subject: [PATCH 61/67] fixed path for axpy pipelined for test_ngtf.py --- tools/test_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/test_utils.py b/tools/test_utils.py index 12f2ead29..4c84a6d79 100755 --- a/tools/test_utils.py +++ b/tools/test_utils.py @@ -108,7 +108,7 @@ def run_ngtf_pytests(venv_dir, build_dir): build_dir = os.path.abspath(build_dir) venv_dir = os.path.abspath(venv_dir) mnist_dir = os.path.abspath(build_dir + '/examples/mnist/') - + axpy_dir = os.path.abspath(build_dir + '/examples/') test_dir = os.path.join(build_dir, "test") test_dir = os.path.join(test_dir, "python") @@ -130,7 +130,7 @@ def run_ngtf_pytests(venv_dir, build_dir): build_dir) + " --ignore=" + build_dir + "/test/python/bfloat16" env = os.environ.copy() new_paths = venv_dir + '/bin/python3:' + os.path.abspath( - build_dir) + ":" + os.path.abspath(mnist_dir) + build_dir) + ":" + os.path.abspath(mnist_dir) + os.path.abspath(axpy_dir) if 'PYTHONPATH' in env: env["PYTHONPATH"] = new_paths + ":" + env["PYTHONPATH"] else: From b639f9dd7a856a343cd237740b1d52c9b68cf1d0 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 13:55:00 -0800 Subject: [PATCH 62/67] Added more specific tracing for prefetched --- ngraph_bridge/ngraph_encapsulate_op.cc | 4 +++- ngraph_bridge/ngraph_encapsulate_op_utils.cc | 12 ++++++------ ngraph_bridge/ngraph_prefetch_dataset_op.cc | 12 +++++++++--- tools/test_utils.py | 3 ++- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 2ce5f46fc..5acd4eb5d 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -504,7 +504,9 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { ngraph::Event::write_trace(event_prepare_ng_tensors); // And execute - ngraph::Event event_execute_graph("Execute Graph", "", ""); + ngraph::Event event_execute_graph( + "Execute Graph Pipeline Indx" + to_string(current_iter_pipeline_depth), + "", ""); BackendManager::LockBackend(m_parallel_executor->GetOpBackendName()); NGRAPH_VLOG(4) << "NGraphEncapsulateOp::Compute call starting for cluster " diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.cc b/ngraph_bridge/ngraph_encapsulate_op_utils.cc index a2fe10a56..d12494e45 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.cc +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.cc @@ -33,7 +33,7 @@ Status GetPipelinedIOTensorsReadyForExecution( OpKernelContext* ctx, const vector& tf_input_tensors, const shared_ptr& pipelined_tensor_store, const shared_ptr& tensor_manager, - std::tuple& + tuple& pipelined_io_tensors) { auto io_tensors = pipelined_tensor_store->get_tensors(); @@ -89,7 +89,7 @@ Status GetPipelinedIOTensorsReadyForExecution( tensor_manager->GetInputIndexesForPrefetchSharedObject()); // Get the set of IO tensors for the next iteration - std::tuple + tuple io_tensors_next_iter; io_tensors_next_iter = pipelined_tensor_store->get_tensors(); @@ -209,21 +209,21 @@ Status GetPipelinedIOTensorsReadyForExecution( tf_input_tensors[tf_index].dtype(), &ng_element_type)); void* current_src_ptr = (void*)DMAHelper::base(&tf_input_tensors[tf_index]); - std::unique_ptr event_copy_h2d( - new ngraph::Event("H2D_Input_" + std::to_string(tf_index), "", "")); + unique_ptr event_copy_h2d( + new ngraph::Event("H2D_Input_" + to_string(tf_index), "", "")); try { ng_pipelined_inputs[ng_index]->write( current_src_ptr, ng_pipelined_inputs[ng_index]->get_element_count() * ng_element_type.size()); - } catch (const std::exception& exp) { + } catch (const exception& exp) { return errors::Internal("Error copying TF tensor to device tensor: ", exp.what()); } catch (...) { return errors::Internal("Error copying TF tensor to device tensor"); } event_copy_h2d->Stop(); - input_write_events.push_back(std::move(event_copy_h2d)); + input_write_events.push_back(move(event_copy_h2d)); } } diff --git a/ngraph_bridge/ngraph_prefetch_dataset_op.cc b/ngraph_bridge/ngraph_prefetch_dataset_op.cc index 18b946191..5435bfccb 100644 --- a/ngraph_bridge/ngraph_prefetch_dataset_op.cc +++ b/ngraph_bridge/ngraph_prefetch_dataset_op.cc @@ -415,14 +415,15 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { ngraph_bridge::NGraphPrefetchSharedResouce::RESOURCE_NAME, &shared_data); if (s.ok()) { - ngraph::Event evt_dev_cp("Prf Dev Copy", "Copy", ""); shared_data->SetBufferDepth(m_buffer_size); auto ng_input_tensor_bundle = shared_data->GetNextIOTensorBundleForDeviceTransfer(); auto ng_prefetch_input_indexes_map = shared_data->GetPrefetchInputIndexesMap(); - + ngraph::Event evt_dev_cp( + "Prf Dev Copy: Pipe_Ind_" + to_string(ng_input_tensor_bundle.Id), + "Copy", ""); int number_of_buffer_elements = buffer_element.value.size(); if (number_of_buffer_elements != ng_prefetch_input_indexes_map.size()) { @@ -433,7 +434,8 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { "encap " + to_string(ng_prefetch_input_indexes_map.size())); } - + std::vector> + prefetch_input_write_events; // Write to these tensors for (auto itr : ng_prefetch_input_indexes_map) { int ng_index = itr.first; @@ -445,6 +447,8 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { void* current_src_ptr = (void*)DMAHelper::base(&buffer_element.value[tf_index]); + std::unique_ptr event_copy_h2d(new ngraph::Event( + "H2D_PrefetchInput_" + std::to_string(tf_index), "", "")); try { NGRAPH_VLOG(2) << "[PREFETCH] INPUT tensor being written by Prefetch: " @@ -459,6 +463,8 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { throw std::runtime_error( "Error copying TF tensor to device tensor"); } + event_copy_h2d->Stop(); + prefetch_input_write_events.push_back(std::move(event_copy_h2d)); } // Now add them back to the other queue diff --git a/tools/test_utils.py b/tools/test_utils.py index 4c84a6d79..427dda0a9 100755 --- a/tools/test_utils.py +++ b/tools/test_utils.py @@ -130,7 +130,8 @@ def run_ngtf_pytests(venv_dir, build_dir): build_dir) + " --ignore=" + build_dir + "/test/python/bfloat16" env = os.environ.copy() new_paths = venv_dir + '/bin/python3:' + os.path.abspath( - build_dir) + ":" + os.path.abspath(mnist_dir) + os.path.abspath(axpy_dir) + build_dir) + ":" + os.path.abspath(mnist_dir) + os.path.abspath( + axpy_dir) if 'PYTHONPATH' in env: env["PYTHONPATH"] = new_paths + ":" + env["PYTHONPATH"] else: From 0c40739989d1965dcba5d1799bbc0a13de80cc6f Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 16:05:54 -0800 Subject: [PATCH 63/67] incorporate review comments --- ngraph_bridge/ngraph_encapsulate_op.cc | 2 +- tools/test_utils.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op.cc b/ngraph_bridge/ngraph_encapsulate_op.cc index 5acd4eb5d..4605757ae 100644 --- a/ngraph_bridge/ngraph_encapsulate_op.cc +++ b/ngraph_bridge/ngraph_encapsulate_op.cc @@ -496,7 +496,7 @@ void NGraphEncapsulateOp::ComputeUsingParallelExecutor(OpKernelContext* ctx) { vector> ng_outputs(num_of_outputs); // Prepare NG Input Output Tensors - // Retrofit Variable tensors and pipelined tensors to ng_input and ng_outputs + // Assemble Variable tensors and pipelined tensors to ng_input and ng_outputs OP_REQUIRES_OK(ctx, GetIOTensorsReadyForExecution( ctx, tensor_manager, get<1>(pipelined_io_tensors), get<2>(pipelined_io_tensors), ng_inputs, ng_outputs)); diff --git a/tools/test_utils.py b/tools/test_utils.py index 427dda0a9..207097567 100755 --- a/tools/test_utils.py +++ b/tools/test_utils.py @@ -130,8 +130,8 @@ def run_ngtf_pytests(venv_dir, build_dir): build_dir) + " --ignore=" + build_dir + "/test/python/bfloat16" env = os.environ.copy() new_paths = venv_dir + '/bin/python3:' + os.path.abspath( - build_dir) + ":" + os.path.abspath(mnist_dir) + os.path.abspath( - axpy_dir) + build_dir) + ":" + os.path.abspath(axpy_dir) + os.path.abspath( + mnist_dir) if 'PYTHONPATH' in env: env["PYTHONPATH"] = new_paths + ":" + env["PYTHONPATH"] else: From 4b21cc1e65a0e67664a9aa5d26343952427c9673 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 16:14:15 -0800 Subject: [PATCH 64/67] minor --- ngraph_bridge/ngraph_encapsulate_op_utils.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ngraph_bridge/ngraph_encapsulate_op_utils.h b/ngraph_bridge/ngraph_encapsulate_op_utils.h index 631171b4a..1a6df4ede 100644 --- a/ngraph_bridge/ngraph_encapsulate_op_utils.h +++ b/ngraph_bridge/ngraph_encapsulate_op_utils.h @@ -52,8 +52,9 @@ Status GetPipelinedIOTensorsReadyForExecution( tuple& pipelined_io_tensors); -// Assembles the different types of input and output tensors into a right order -// Retrofit Variable tensors and pipelined tensors to ng_input and ng_outputs +// Assembles the different types of input and output tensors +// Variable tensors and pipelined tensors are put together in the right order +// into ng_inputs and ng_outputs // 1. For input indexes that are fed by variables, get the variable tensors from // context // 2. For output indexes that are updating variables, get the variable tensors From 36f4bec05a3748681ab6186082aca216ea7d53ba Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 16:49:10 -0800 Subject: [PATCH 65/67] removed print vector. added lambda --- ngraph_bridge/ngraph_tensor_manager.cc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ngraph_bridge/ngraph_tensor_manager.cc b/ngraph_bridge/ngraph_tensor_manager.cc index 5de0e2e32..116c213ec 100644 --- a/ngraph_bridge/ngraph_tensor_manager.cc +++ b/ngraph_bridge/ngraph_tensor_manager.cc @@ -150,18 +150,15 @@ void NGraphTensorManager::Initialize() { FindComplement(m_pipelined_input_indexes, m_prefetched_input_indexes); } -//--------------------------------------------------------------------------- -// PrintVector Utility -//--------------------------------------------------------------------------- -void PrintVector(const vector& input_vector, const string title) { - cout << title << endl; - cout << ng::join(input_vector) << endl; -} - //--------------------------------------------------------------------------- // NGraphTensorManager::Print //--------------------------------------------------------------------------- void NGraphTensorManager::Print() { + auto PrintVector = [](const vector& input_vector, const string title) { + cout << title << endl; + cout << ng::join(input_vector) << endl; + }; + cout << "** NGEncapsulate TensorManager:" << m_ng_encap_node_name << " **" << endl; From bf3b84664a35ec43aa5b5e5d52ccffbbee937728 Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 17:00:18 -0800 Subject: [PATCH 66/67] fix test_utils.py --- tools/test_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/test_utils.py b/tools/test_utils.py index 207097567..e8f5752d3 100755 --- a/tools/test_utils.py +++ b/tools/test_utils.py @@ -130,7 +130,7 @@ def run_ngtf_pytests(venv_dir, build_dir): build_dir) + " --ignore=" + build_dir + "/test/python/bfloat16" env = os.environ.copy() new_paths = venv_dir + '/bin/python3:' + os.path.abspath( - build_dir) + ":" + os.path.abspath(axpy_dir) + os.path.abspath( + build_dir) + ":" + os.path.abspath(axpy_dir) + ":" + os.path.abspath( mnist_dir) if 'PYTHONPATH' in env: env["PYTHONPATH"] = new_paths + ":" + env["PYTHONPATH"] From 8e346e8122f888155595c51f31ac529e443efbaf Mon Sep 17 00:00:00 2001 From: Shrestha Malik Date: Thu, 19 Dec 2019 17:46:18 -0800 Subject: [PATCH 67/67] write prefetch traces --- ngraph_bridge/ngraph_prefetch_dataset_op.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ngraph_bridge/ngraph_prefetch_dataset_op.cc b/ngraph_bridge/ngraph_prefetch_dataset_op.cc index 5435bfccb..7c131bcce 100644 --- a/ngraph_bridge/ngraph_prefetch_dataset_op.cc +++ b/ngraph_bridge/ngraph_prefetch_dataset_op.cc @@ -448,7 +448,7 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { void* current_src_ptr = (void*)DMAHelper::base(&buffer_element.value[tf_index]); std::unique_ptr event_copy_h2d(new ngraph::Event( - "H2D_PrefetchInput_" + std::to_string(tf_index), "", "")); + "H2D_PrefetchInput_" + std::to_string(tf_index), "Copy", "")); try { NGRAPH_VLOG(2) << "[PREFETCH] INPUT tensor being written by Prefetch: " @@ -467,6 +467,10 @@ class NGraphPrefetchDatasetOp::Dataset : public DatasetBase { prefetch_input_write_events.push_back(std::move(event_copy_h2d)); } + for (auto& next : prefetch_input_write_events) { + ngraph::Event::write_trace(*next.get()); + } + // Now add them back to the other queue shared_data->AddNextIOTensorBundleReadyForDeviceExecution( ng_input_tensor_bundle);