From 819c9119c1ac9cee5fcb4e342bebe2e564ebb5c1 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Tue, 26 Apr 2022 20:28:17 -0700 Subject: [PATCH 1/4] fix(//core/runtime): Support more delimiter variants Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- core/runtime/TRTEngine.cpp | 40 +++++++++++++++++++++++++++++++++++++- core/runtime/runtime.h | 2 ++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp index d46fcdfee5..5ba3a56830 100644 --- a/core/runtime/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -53,13 +53,20 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe TORCHTRT_CHECK((cuda_engine.get() != nullptr), "Unable to deserialize the TensorRT engine"); exec_ctx = make_trt(cuda_engine->createExecutionContext()); + TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context"); uint64_t inputs = 0; uint64_t outputs = 0; for (int64_t x = 0; x < cuda_engine->getNbBindings(); x++) { std::string bind_name = cuda_engine->getBindingName(x); - std::string idx_s = bind_name.substr(bind_name.find("_") + 1); + auto delim = bind_name.find("."); + if (delim == std::string::npos) { + delim = bind_name.find("_"); + TORCHTRT_CHECK(delim != std::string::npos, "Unable to determine binding index for input " << bind_name << "\nEnsure module was compile with Torch-TensorRT.ts"); + } + + std::string idx_s = bind_name.substr(delim + 1); uint64_t idx = static_cast(std::stoi(idx_s)); if (cuda_engine->bindingIsInput(x)) { @@ -71,6 +78,8 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe } } num_io = std::make_pair(inputs, outputs); + + LOG_DEBUG(*this); } TRTEngine& TRTEngine::operator=(const TRTEngine& other) { @@ -82,6 +91,34 @@ TRTEngine& TRTEngine::operator=(const TRTEngine& other) { return (*this); } +std::string TRTEngine::to_str() const { + std::stringstream ss; + ss << "Torch-TensorRT TensorRT Engine:" << std::endl; + ss << " Name: " << name << std::endl; + ss << " Inputs: [" << std::endl; + for (uint64_t i = 0; i < num_io.first; i++) { + ss << " id: " << i << std::endl; + ss << " shape: " << exec_ctx->getBindingDimensions(i) << std::endl; + ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl; + } + ss << " ]" << std::endl; + ss << " Outputs: [" << std::endl; + for (uint64_t o = 0; o < num_io.second; o++) { + ss << " id: " << o << std::endl; + ss << " shape: " << exec_ctx->getBindingDimensions(o) << std::endl; + ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl; + } + ss << " ]" << std::endl; + ss << " Device: " << device_info << std::endl; + + return ss.str(); +} + +std::ostream& operator<<(std::ostream& os, const TRTEngine& engine) { + os << engine.to_str(); + return os; +} + // TODO: Implement a call method // c10::List TRTEngine::Run(c10::List inputs) { // auto input_vec = inputs.vec(); @@ -96,6 +133,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion = .def(torch::init>()) // TODO: .def("__call__", &TRTEngine::Run) // TODO: .def("run", &TRTEngine::Run) + .def("__str__", &TRTEngine::to_str) .def_pickle( [](const c10::intrusive_ptr& self) -> std::vector { // Serialize TensorRT engine diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h index 79ae74c91b..2d92fa4e00 100644 --- a/core/runtime/runtime.h +++ b/core/runtime/runtime.h @@ -59,6 +59,8 @@ struct TRTEngine : torch::CustomClassHolder { TRTEngine(std::vector serialized_info); TRTEngine(std::string mod_name, std::string serialized_engine, CudaDevice cuda_device); TRTEngine& operator=(const TRTEngine& other); + std::string to_str() const; + friend std::ostream& operator<<(std::ostream& os, const TRTEngine& engine); // TODO: Implement a call method // c10::List Run(c10::List inputs); }; From 1c3c779414a739963f7b470cd520ff3a2e92a4b5 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Tue, 26 Apr 2022 20:37:39 -0700 Subject: [PATCH 2/4] docs: update docs with binding name restrictions Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- cpp/include/torch_tensorrt/torch_tensorrt.h | 7 ++++++- py/torch_tensorrt/ts/_compiler.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index ace05d33f5..eb11ad20cf 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -739,7 +739,12 @@ TORCHTRT_API std::string convert_method_to_trt_engine( * module. Registers execution of the engine as the forward method of the module * Forward is defined as: forward(Tensor[]) -> Tensor[] * - * @return: A new module trageting a TensorRT engine + * TensorRT bindings must have names with the following format: + * - [symbol].[index in input / output array] + * ex. + * - [x.0, x.1, x.2] -> [y.0] + * + * @return: A new module targeting a TensorRT engine */ TORCHTRT_API torch::jit::Module embed_engine_in_new_module(const std::string& engine, Device device); } // namespace torchscript diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py index b895bf54b8..c0e88b99ce 100644 --- a/py/torch_tensorrt/ts/_compiler.py +++ b/py/torch_tensorrt/ts/_compiler.py @@ -207,6 +207,11 @@ def embed_engine_in_new_module(serialized_engine: bytes, device=Device._current_ forward(Tensor[]) -> Tensor[] + TensorRT bindings must have names with the following format: + - [symbol].[index in input / output array] + ex. + - [x.0, x.1, x.2] -> [y.0] + Module can be save with engine embedded with torch.jit.save and moved / loaded according to torch_tensorrt portability rules Arguments: From 49d367da7b49fafd122d551c5a305d77d811d422 Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Thu, 5 May 2022 16:28:37 -0700 Subject: [PATCH 3/4] refactor: apply linting Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- core/runtime/TRTEngine.cpp | 17 ++++++++++------- tests/util/run_graph_engine.cpp | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp index 5ba3a56830..70b51e0812 100644 --- a/core/runtime/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -63,7 +63,10 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe auto delim = bind_name.find("."); if (delim == std::string::npos) { delim = bind_name.find("_"); - TORCHTRT_CHECK(delim != std::string::npos, "Unable to determine binding index for input " << bind_name << "\nEnsure module was compile with Torch-TensorRT.ts"); + TORCHTRT_CHECK( + delim != std::string::npos, + "Unable to determine binding index for input " << bind_name + << "\nEnsure module was compile with Torch-TensorRT.ts"); } std::string idx_s = bind_name.substr(delim + 1); @@ -97,16 +100,16 @@ std::string TRTEngine::to_str() const { ss << " Name: " << name << std::endl; ss << " Inputs: [" << std::endl; for (uint64_t i = 0; i < num_io.first; i++) { - ss << " id: " << i << std::endl; - ss << " shape: " << exec_ctx->getBindingDimensions(i) << std::endl; - ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl; + ss << " id: " << i << std::endl; + ss << " shape: " << exec_ctx->getBindingDimensions(i) << std::endl; + ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl; } ss << " ]" << std::endl; ss << " Outputs: [" << std::endl; for (uint64_t o = 0; o < num_io.second; o++) { - ss << " id: " << o << std::endl; - ss << " shape: " << exec_ctx->getBindingDimensions(o) << std::endl; - ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl; + ss << " id: " << o << std::endl; + ss << " shape: " << exec_ctx->getBindingDimensions(o) << std::endl; + ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl; } ss << " ]" << std::endl; ss << " Device: " << device_info << std::endl; diff --git a/tests/util/run_graph_engine.cpp b/tests/util/run_graph_engine.cpp index df52b54b26..fe211f2baf 100644 --- a/tests/util/run_graph_engine.cpp +++ b/tests/util/run_graph_engine.cpp @@ -21,7 +21,7 @@ std::vector toInputs(std::vector ten) { for (auto i : ten) { a.push_back(core::ir::Input(core::util::toVec(i.sizes()))); } - return std::move(a); + return a; } std::vector toInputsDynamic(std::vector ten, bool dynamic_batch) { @@ -49,7 +49,7 @@ std::vector toInputsDynamic(std::vector ten, bool d } } - return std::move(a); + return a; } std::vector RunEngine(std::string& eng, std::vector inputs) { From 65af9d1b7ad2e1ac11eda5206279a8c666bb52ec Mon Sep 17 00:00:00 2001 From: Naren Dasan Date: Fri, 6 May 2022 18:29:55 -0700 Subject: [PATCH 4/4] refactor(//core/runtime): Updating the logging for runtime deserialization NOTE: This does not fully address the deserialization issue as the root cause is TensorRT modifies the input binding names which is leading to these cases of stoi errors. Signed-off-by: Naren Dasan Signed-off-by: Naren Dasan --- core/ir/ir.cpp | 2 +- core/runtime/TRTEngine.cpp | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp index 1c1813ea5f..fcca3df33c 100644 --- a/core/ir/ir.cpp +++ b/core/ir/ir.cpp @@ -21,7 +21,7 @@ InputSpecMap pair_input_vals_with_specs(std::vector va std::unordered_map a; for (size_t i = 0; i < vals.size(); i++) { - LOG_DEBUG("Paring " << i << ": " << vals[i]->debugName() << " : " << specs[i]); + LOG_DEBUG("Pairing " << i << ": " << vals[i]->debugName() << " : " << specs[i]); a.insert({vals[i], specs[i]}); } return a; diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp index 70b51e0812..26c755cb19 100644 --- a/core/runtime/TRTEngine.cpp +++ b/core/runtime/TRTEngine.cpp @@ -60,13 +60,15 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe for (int64_t x = 0; x < cuda_engine->getNbBindings(); x++) { std::string bind_name = cuda_engine->getBindingName(x); + LOG_DEBUG("Binding name: " << bind_name); auto delim = bind_name.find("."); if (delim == std::string::npos) { delim = bind_name.find("_"); TORCHTRT_CHECK( delim != std::string::npos, - "Unable to determine binding index for input " << bind_name - << "\nEnsure module was compile with Torch-TensorRT.ts"); + "Unable to determine binding index for input " + << bind_name + << "\nEnsure module was compiled with Torch-TensorRT.ts or follows Torch-TensorRT Runtime conventions"); } std::string idx_s = bind_name.substr(delim + 1); @@ -108,8 +110,8 @@ std::string TRTEngine::to_str() const { ss << " Outputs: [" << std::endl; for (uint64_t o = 0; o < num_io.second; o++) { ss << " id: " << o << std::endl; - ss << " shape: " << exec_ctx->getBindingDimensions(o) << std::endl; - ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl; + ss << " shape: " << exec_ctx->getBindingDimensions(o) << std::endl; + ss << " dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl; } ss << " ]" << std::endl; ss << " Device: " << device_info << std::endl;