From 819c9119c1ac9cee5fcb4e342bebe2e564ebb5c1 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Tue, 26 Apr 2022 20:28:17 -0700
Subject: [PATCH 1/4] fix(//core/runtime): Support more delimiter variants

Signed-off-by: Naren Dasan <narens@nvidia.com>
Signed-off-by: Naren Dasan <naren@narendasan.com>
---
 core/runtime/TRTEngine.cpp | 40 +++++++++++++++++++++++++++++++++++++-
 core/runtime/runtime.h     |  2 ++
 2 files changed, 41 insertions(+), 1 deletion(-)
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
index d46fcdfee5..5ba3a56830 100644
--- a/core/runtime/TRTEngine.cpp
+++ b/core/runtime/TRTEngine.cpp
@@ -53,13 +53,20 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe
   TORCHTRT_CHECK((cuda_engine.get() != nullptr), "Unable to deserialize the TensorRT engine");
 
   exec_ctx = make_trt(cuda_engine->createExecutionContext());
+  TORCHTRT_CHECK((exec_ctx.get() != nullptr), "Unable to create TensorRT execution context");
 
   uint64_t inputs = 0;
   uint64_t outputs = 0;
 
   for (int64_t x = 0; x < cuda_engine->getNbBindings(); x++) {
     std::string bind_name = cuda_engine->getBindingName(x);
-    std::string idx_s = bind_name.substr(bind_name.find("_") + 1);
+    auto delim = bind_name.find(".");
+    if (delim == std::string::npos) {
+      delim = bind_name.find("_");
+      TORCHTRT_CHECK(delim != std::string::npos, "Unable to determine binding index for input " << bind_name << "\nEnsure module was compile with Torch-TensorRT.ts");
+    }
+
+    std::string idx_s = bind_name.substr(delim + 1);
     uint64_t idx = static_cast<uint64_t>(std::stoi(idx_s));
 
     if (cuda_engine->bindingIsInput(x)) {
@@ -71,6 +78,8 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe
     }
   }
   num_io = std::make_pair(inputs, outputs);
+
+  LOG_DEBUG(*this);
 }
 
 TRTEngine& TRTEngine::operator=(const TRTEngine& other) {
@@ -82,6 +91,34 @@ TRTEngine& TRTEngine::operator=(const TRTEngine& other) {
   return (*this);
 }
 
+std::string TRTEngine::to_str() const {
+  std::stringstream ss;
+  ss << "Torch-TensorRT TensorRT Engine:" << std::endl;
+  ss << "  Name: " << name << std::endl;
+  ss << "  Inputs: [" << std::endl;
+  for (uint64_t i = 0; i < num_io.first; i++) {
+  ss << "    id: " << i << std::endl;
+  ss << "      shape: " << exec_ctx->getBindingDimensions(i) << std::endl;
+  ss << "      dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl;
+  }
+  ss << "  ]" << std::endl;
+  ss << "  Outputs: [" << std::endl;
+  for (uint64_t o = 0; o < num_io.second; o++) {
+  ss << "    id: " << o << std::endl;
+  ss << "    shape: " << exec_ctx->getBindingDimensions(o) << std::endl;
+  ss << "    dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl;
+  }
+  ss << "  ]" << std::endl;
+  ss << "  Device: " << device_info << std::endl;
+
+  return ss.str();
+}
+
+std::ostream& operator<<(std::ostream& os, const TRTEngine& engine) {
+  os << engine.to_str();
+  return os;
+}
+
 // TODO: Implement a call method
 // c10::List<at::Tensor> TRTEngine::Run(c10::List<at::Tensor> inputs) {
 //     auto input_vec = inputs.vec();
@@ -96,6 +133,7 @@ static auto TORCHTRT_UNUSED TRTEngineTSRegistrtion =
         .def(torch::init<std::vector<std::string>>())
         // TODO: .def("__call__", &TRTEngine::Run)
         // TODO: .def("run", &TRTEngine::Run)
+        .def("__str__", &TRTEngine::to_str)
         .def_pickle(
             [](const c10::intrusive_ptr<TRTEngine>& self) -> std::vector<std::string> {
               // Serialize TensorRT engine
diff --git a/core/runtime/runtime.h b/core/runtime/runtime.h
index 79ae74c91b..2d92fa4e00 100644
--- a/core/runtime/runtime.h
+++ b/core/runtime/runtime.h
@@ -59,6 +59,8 @@ struct TRTEngine : torch::CustomClassHolder {
   TRTEngine(std::vector<std::string> serialized_info);
   TRTEngine(std::string mod_name, std::string serialized_engine, CudaDevice cuda_device);
   TRTEngine& operator=(const TRTEngine& other);
+  std::string to_str() const;
+  friend std::ostream& operator<<(std::ostream& os, const TRTEngine& engine);
   // TODO: Implement a call method
   // c10::List<at::Tensor> Run(c10::List<at::Tensor> inputs);
 };

From 1c3c779414a739963f7b470cd520ff3a2e92a4b5 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Tue, 26 Apr 2022 20:37:39 -0700
Subject: [PATCH 2/4] docs: update docs with binding name restrictions

Signed-off-by: Naren Dasan <narens@nvidia.com>
Signed-off-by: Naren Dasan <naren@narendasan.com>
---
 cpp/include/torch_tensorrt/torch_tensorrt.h | 7 ++++++-
 py/torch_tensorrt/ts/_compiler.py           | 5 +++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h
index ace05d33f5..eb11ad20cf 100644
--- a/cpp/include/torch_tensorrt/torch_tensorrt.h
+++ b/cpp/include/torch_tensorrt/torch_tensorrt.h
@@ -739,7 +739,12 @@ TORCHTRT_API std::string convert_method_to_trt_engine(
  * module. Registers execution of the engine as the forward method of the module
  * Forward is defined as: forward(Tensor[]) -> Tensor[]
  *
- * @return: A new module trageting a TensorRT engine
+ * TensorRT bindings must have names with the following format:
+ * - [symbol].[index in input / output array]
+ * ex.
+ * - [x.0, x.1, x.2] -> [y.0]
+ *
+ * @return: A new module targeting a TensorRT engine
  */
 TORCHTRT_API torch::jit::Module embed_engine_in_new_module(const std::string& engine, Device device);
 } // namespace torchscript
diff --git a/py/torch_tensorrt/ts/_compiler.py b/py/torch_tensorrt/ts/_compiler.py
index b895bf54b8..c0e88b99ce 100644
--- a/py/torch_tensorrt/ts/_compiler.py
+++ b/py/torch_tensorrt/ts/_compiler.py
@@ -207,6 +207,11 @@ def embed_engine_in_new_module(serialized_engine: bytes, device=Device._current_
 
         forward(Tensor[]) -> Tensor[]
 
+    TensorRT bindings must have names with the following format:
+      - [symbol].[index in input / output array]
+      ex.
+      - [x.0, x.1, x.2] -> [y.0]
+
     Module can be save with engine embedded with torch.jit.save and moved / loaded according to torch_tensorrt portability rules
 
     Arguments:

From 49d367da7b49fafd122d551c5a305d77d811d422 Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Thu, 5 May 2022 16:28:37 -0700
Subject: [PATCH 3/4] refactor: apply linting

Signed-off-by: Naren Dasan <narens@nvidia.com>
Signed-off-by: Naren Dasan <naren@narendasan.com>
---
 core/runtime/TRTEngine.cpp      | 17 ++++++++++-------
 tests/util/run_graph_engine.cpp |  4 ++--
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
index 5ba3a56830..70b51e0812 100644
--- a/core/runtime/TRTEngine.cpp
+++ b/core/runtime/TRTEngine.cpp
@@ -63,7 +63,10 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe
     auto delim = bind_name.find(".");
     if (delim == std::string::npos) {
       delim = bind_name.find("_");
-      TORCHTRT_CHECK(delim != std::string::npos, "Unable to determine binding index for input " << bind_name << "\nEnsure module was compile with Torch-TensorRT.ts");
+      TORCHTRT_CHECK(
+          delim != std::string::npos,
+          "Unable to determine binding index for input " << bind_name
+                                                         << "\nEnsure module was compile with Torch-TensorRT.ts");
     }
 
     std::string idx_s = bind_name.substr(delim + 1);
@@ -97,16 +100,16 @@ std::string TRTEngine::to_str() const {
   ss << "  Name: " << name << std::endl;
   ss << "  Inputs: [" << std::endl;
   for (uint64_t i = 0; i < num_io.first; i++) {
-  ss << "    id: " << i << std::endl;
-  ss << "      shape: " << exec_ctx->getBindingDimensions(i) << std::endl;
-  ss << "      dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl;
+    ss << "    id: " << i << std::endl;
+    ss << "      shape: " << exec_ctx->getBindingDimensions(i) << std::endl;
+    ss << "      dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(i)) << std::endl;
   }
   ss << "  ]" << std::endl;
   ss << "  Outputs: [" << std::endl;
   for (uint64_t o = 0; o < num_io.second; o++) {
-  ss << "    id: " << o << std::endl;
-  ss << "    shape: " << exec_ctx->getBindingDimensions(o) << std::endl;
-  ss << "    dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl;
+    ss << "    id: " << o << std::endl;
+    ss << "    shape: " << exec_ctx->getBindingDimensions(o) << std::endl;
+    ss << "    dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl;
   }
   ss << "  ]" << std::endl;
   ss << "  Device: " << device_info << std::endl;
diff --git a/tests/util/run_graph_engine.cpp b/tests/util/run_graph_engine.cpp
index df52b54b26..fe211f2baf 100644
--- a/tests/util/run_graph_engine.cpp
+++ b/tests/util/run_graph_engine.cpp
@@ -21,7 +21,7 @@ std::vector<core::ir::Input> toInputs(std::vector<at::Tensor> ten) {
   for (auto i : ten) {
     a.push_back(core::ir::Input(core::util::toVec(i.sizes())));
   }
-  return std::move(a);
+  return a;
 }
 
 std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool dynamic_batch) {
@@ -49,7 +49,7 @@ std::vector<core::ir::Input> toInputsDynamic(std::vector<at::Tensor> ten, bool d
     }
   }
 
-  return std::move(a);
+  return a;
 }
 
 std::vector<at::Tensor> RunEngine(std::string& eng, std::vector<at::Tensor> inputs) {

From 65af9d1b7ad2e1ac11eda5206279a8c666bb52ec Mon Sep 17 00:00:00 2001
From: Naren Dasan <naren@narendasan.com>
Date: Fri, 6 May 2022 18:29:55 -0700
Subject: [PATCH 4/4] refactor(//core/runtime): Updating the logging for
 runtime deserialization

NOTE: This does not fully address the deserialization issue
as the root cause is TensorRT modifies the input binding names
which is leading to these cases of stoi errors.

Signed-off-by: Naren Dasan <naren@narendasan.com>
Signed-off-by: Naren Dasan <narens@nvidia.com>
---
 core/ir/ir.cpp             |  2 +-
 core/runtime/TRTEngine.cpp | 10 ++++++----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/core/ir/ir.cpp b/core/ir/ir.cpp
index 1c1813ea5f..fcca3df33c 100644
--- a/core/ir/ir.cpp
+++ b/core/ir/ir.cpp
@@ -21,7 +21,7 @@ InputSpecMap pair_input_vals_with_specs(std::vector<const torch::jit::Value*> va
 
   std::unordered_map<const torch::jit::Value*, core::ir::Input> a;
   for (size_t i = 0; i < vals.size(); i++) {
-    LOG_DEBUG("Paring " << i << ": " << vals[i]->debugName() << " : " << specs[i]);
+    LOG_DEBUG("Pairing " << i << ": " << vals[i]->debugName() << " : " << specs[i]);
     a.insert({vals[i], specs[i]});
   }
   return a;
diff --git a/core/runtime/TRTEngine.cpp b/core/runtime/TRTEngine.cpp
index 70b51e0812..26c755cb19 100644
--- a/core/runtime/TRTEngine.cpp
+++ b/core/runtime/TRTEngine.cpp
@@ -60,13 +60,15 @@ TRTEngine::TRTEngine(std::string mod_name, std::string serialized_engine, CudaDe
 
   for (int64_t x = 0; x < cuda_engine->getNbBindings(); x++) {
     std::string bind_name = cuda_engine->getBindingName(x);
+    LOG_DEBUG("Binding name: " << bind_name);
     auto delim = bind_name.find(".");
     if (delim == std::string::npos) {
       delim = bind_name.find("_");
       TORCHTRT_CHECK(
           delim != std::string::npos,
-          "Unable to determine binding index for input " << bind_name
-                                                         << "\nEnsure module was compile with Torch-TensorRT.ts");
+          "Unable to determine binding index for input "
+              << bind_name
+              << "\nEnsure module was compiled with Torch-TensorRT.ts or follows Torch-TensorRT Runtime conventions");
     }
 
     std::string idx_s = bind_name.substr(delim + 1);
@@ -108,8 +110,8 @@ std::string TRTEngine::to_str() const {
   ss << "  Outputs: [" << std::endl;
   for (uint64_t o = 0; o < num_io.second; o++) {
     ss << "    id: " << o << std::endl;
-    ss << "    shape: " << exec_ctx->getBindingDimensions(o) << std::endl;
-    ss << "    dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl;
+    ss << "      shape: " << exec_ctx->getBindingDimensions(o) << std::endl;
+    ss << "      dtype: " << util::TRTDataTypeToScalarType(exec_ctx->getEngine().getBindingDataType(o)) << std::endl;
   }
   ss << "  ]" << std::endl;
   ss << "  Device: " << device_info << std::endl;