Example code for c++ conversion

tensorflow · Sep 8, 2021 · e6d0699 · e6d0699
1 parent 946d1c4
commit e6d0699
Show file tree

Hide file tree

Showing 13 changed files with 12,130 additions and 0 deletions.
diff --git a/tf_trt_cpp_example/CMakeLists.txt b/tf_trt_cpp_example/CMakeLists.txt
@@ -0,0 +1,24 @@
+cmake_minimum_required(VERSION 3.13)
+
+project(TF_TRT_Example)
+
+set(CMAKE_CXX_STANDARD 14)
+
+add_executable(tf_trt_example main.cc mnist.h mnist.cc trt_convert.cc freeze_saved_model.cc)
+
+target_link_libraries(tf_trt_example tensorflow_cc)
+target_link_libraries(tf_trt_example tensorflow_framework)
+
+target_compile_options(tf_trt_example PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0 -DGOOGLE_CUDA -DGOOGLE_TENSORRT)
+
+# To find freeze_saved_model.h
+target_compile_options(tf_trt_example PRIVATE -I/opt/tensorflow/tensorflow-source)
+
+target_link_directories(tf_trt_example PRIVATE /usr/local/lib/python3.8/dist-packages/tensorflow)
+target_link_directories(tf_trt_example PRIVATE /usr/local/lib/tensorflow)
+
+target_compile_options(tf_trt_example PRIVATE -Wl,-rpath=/usr/local/lib/python3.8/dist-packages/tensorflow)
+
+target_include_directories(tf_trt_example PRIVATE /usr/local/lib/python3.8/dist-packages/tensorflow/include)
+target_include_directories(tf_trt_example PRIVATE /data/tensorflow-source/bazel-out/k8-opt/bin/external/local_config_cuda/cuda/_virtual_includes/cuda_headers_virtual)
+target_include_directories(tf_trt_example PRIVATE /data/tensorflow-source/bazel-out/k8-opt/bin/external/local_config_tensorrt/_virtual_includes/tensorrt_headers)
diff --git a/tf_trt_cpp_example/README.md b/tf_trt_cpp_example/README.md
@@ -0,0 +1,62 @@
+# TF-TRT example for conversion in C++
+
+## Introduction
+
+This directory contains example code to demonstrate the steps necessary to run TF-TRT conversion from C++. This is a work in progress.
+The goal of this example is to facilitate discussion on how to provide a convenient interface for initiating conversion from C++ and to test possible implementations.
+
+The steps for model conversion
+
+0. Inline functions
+1. Freeze the graph
+2. Run Grappler with TRTOptimizer pass
+3. Infer the graph to provide shape information
+4. Infer the graph to build the engines
+5. Convert the graph_def to have static engines
+
+Steps 3-4 are done in one go, the last input data that is provided for shape information also triggers building the engines.
+To implement these steps without rewriting the graph, we have added the n_build_pass converter option.
+Otherwise it would be necessary to change the `profile_generation_mode_` attribute of the graph to enable / disable profile information collection.
+
+
+The final example code shall probably be hosted in the tensorflow/tensorrt repository, and this PR shall contain only the necessary changes in the TF codebase.
+
+### Limitations
+- Freezing the model is not implemented correctly, the current example only works with frozen graphs.
+- Some of the intermediate steps are possibly inefficient
+
+### Acknowledgment
+
+This example is based on https://github.com/bmzhao/saved-model-example
+
+## How to run
+
+### Build TF
+```
+git clone --branch trt_cpp_conversion_example https://github.com/tfeher/tensorflow.git tensorflow-source
+mkdir bazel-cache
+nvidia-docker run --rm -it --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v $PWD:/data -w /data -v $PWD/bazel-cache:/root/.cache/bazel nvcr.io/nvidia/tensorflow:21.06-tf2-py3
+
+# Inside the container
+cp /opt/tensorflow/nvbuild* /opt/tensorflow/bazel_build.sh .
+./nvbuild.sh --noclean --v2
+```
+
+### Build the TF-TRT example
+```
+cd tensorflow-source/tf_trt_cpp_example
+mkdir build
+cd build
+cmake ..
+make
+```
+
+### Train the model and save it
+```
+python mnist_train.py
+```
+
+### Run TF-TRT conversion and infer the converted model
+```
+TF_CPP_VMODULE=trt_convert=2,trt_optimization_pass=2,trt_engine_utils=2,trt_engine_op=2,segment=2,trt_shape_optimization_profiles=2,trt_lru_cache=2,convert_graph=2,trt_engine_resource_ops=2 ./tf_trt_example
+```
diff --git a/tf_trt_cpp_example/create_symlinks.sh b/tf_trt_cpp_example/create_symlinks.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+# Create missing symlinks
+pushd /usr/local/lib/tensorflow
+ln -s libtensorflow_cc.so.2 libtensorflow_cc.so
+cd /usr/local/lib/python3.8/dist-packages/tensorflow
+ln -s libtensorflow_framework.so.2 libtensorflow_framework.so
+popd
+
+# mkdir /usr/local/lib/python3.8/dist-packages/tensorflow/include/third_party/tensorrt
+# cp /usr/include/x86_64-linux-gnu/NvInfer.h /usr/local/lib/python3.8/dist-packages/tensorflow/include/third_party/tensorrt
diff --git a/tf_trt_cpp_example/freeze_saved_model.cc b/tf_trt_cpp_example/freeze_saved_model.cc
@@ -0,0 +1,237 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "freeze_saved_model.h"
+
+#include <iostream>
+#include <queue>
+
+#include "tensorflow/core/framework/attr_value.pb.h"
+#include "tensorflow/core/framework/function.pb.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/framework/node_def.pb.h"
+#include "tensorflow/core/framework/versions.pb.h"
+#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/protobuf/meta_graph.pb.h"
+#include <google/protobuf/map.h>
+
+namespace tensorflow {
+
+namespace {
+
+// Gets tensor names from tensor_info and inserts them into the set of tensor
+// names.
+void GetTensorNamesFromTensorInfo(const TensorInfo &tensor_info,
+                                  std::unordered_set<string> *tensor_names) {
+  if (tensor_info.has_coo_sparse()) {
+    // If the tensor is sparse we have to add all three tensors of the sparse
+    // representations.
+    const TensorInfo_CooSparse &coo_sparse = tensor_info.coo_sparse();
+    tensor_names->insert(coo_sparse.values_tensor_name());
+    tensor_names->insert(coo_sparse.indices_tensor_name());
+    tensor_names->insert(coo_sparse.dense_shape_tensor_name());
+  } else if (tensor_info.has_composite_tensor()) {
+    for (const auto &component : tensor_info.composite_tensor().components()) {
+      tensor_names->insert(component.name());
+    }
+  } else {
+    tensor_names->insert(tensor_info.name());
+  }
+}
+
+// Gets the union of all inputs and outputs of all SignatureDefs in the bundle
+void GetSignatureDefsInputsAndOutputs(
+    const google::protobuf::Map<string, SignatureDef> &signature_def,
+    std::unordered_set<string> *inputs, std::unordered_set<string> *outputs) {
+  for (auto &sigdef_elem : signature_def) {
+    const SignatureDef &signature_def = sigdef_elem.second;
+    for (auto &input_elem : signature_def.inputs()) {
+      GetTensorNamesFromTensorInfo(input_elem.second, inputs);
+    }
+    for (auto &output_elem : signature_def.outputs()) {
+      GetTensorNamesFromTensorInfo(output_elem.second, outputs);
+    }
+  }
+}
+
+// Gets a map from string node name to NodeDef.
+void GetNodeNameToNodeDefMap(
+    GraphDef *graph_def,
+    std::unordered_map<string, NodeDef *> *name_to_node_map) {
+  for (size_t i = 0; i < graph_def->node_size(); i++) {
+    NodeDef *node = graph_def->mutable_node(i);
+    (*name_to_node_map)[node->name()] = node;
+  }
+}
+
+// Strips off the tensor part of the tensor_name to get the node_name.
+const string GetNodeNameFromTensorName(string tensor_name) {
+  if (tensor_name[0] == '^') {
+    tensor_name.erase(0, 1);
+  }
+  std::vector<string> tensor_name_parts = str_util::Split(tensor_name, ':');
+  return tensor_name_parts[0];
+}
+
+// Gets the set of node names needed by `outputs` and the corresponding set of
+// variable nodes to convert.
+void GetReachableNodesAndVariables(
+    GraphDef *graph_def, const std::unordered_set<string> &outputs,
+    const std::unordered_map<string, NodeDef *> &name_to_node_map,
+    std::unordered_set<string> *reachable_node_names,
+    std::unordered_set<string> *variable_node_names) {
+  // TODO(suharshs): Add support for ResourceVariables.
+  static const std::unordered_set<string> *kVariableTypes =
+      new std::unordered_set<string>({"Variable", "VariableV2", "VarHandleOp"});
+
+  std::queue<string> nodes_to_visit;
+  for (const string &output_tensor_name : outputs) {
+    nodes_to_visit.push(GetNodeNameFromTensorName(output_tensor_name));
+  }
+  // We do a traversal backwards from the outputs specified in the MetaGraphDef.
+  while (!nodes_to_visit.empty()) {
+    const string node_name = nodes_to_visit.front();
+    nodes_to_visit.pop();
+    if (reachable_node_names->find(node_name) != reachable_node_names->end()) {
+      continue;
+    }
+    reachable_node_names->insert(node_name);
+    NodeDef *node = name_to_node_map.at(node_name);
+    if (kVariableTypes->find(node->op()) != kVariableTypes->end()) {
+      variable_node_names->insert(node->name());
+    }
+    for (const string &input_tensor_name : node->input()) {
+      nodes_to_visit.push(GetNodeNameFromTensorName(input_tensor_name));
+    }
+  }
+}
+
+// Gets a map from variable name to variable value.
+Status GetVariableNameToTensorMap(
+    Session *session,
+    const std::unordered_map<string, NodeDef *> &name_to_node_map,
+    std::unordered_set<string> variable_names_set,
+    std::unordered_map<string, Tensor> *variable_name_to_value_map) {
+  if (variable_names_set.empty()) {
+    return Status::OK();
+  }
+  std::vector<string> variable_names;
+  variable_names.reserve(variable_names_set.size());
+  std::vector<string> tensor_names;
+  tensor_names.reserve(variable_names_set.size());
+  for (const string &node_name : variable_names_set) {
+    variable_names.push_back(node_name);
+    NodeDef *node_def = name_to_node_map.at(node_name);
+    if (node_def->op() == "VarHandleOp") {
+      // If this is a resource variable, we have to run the corresponding
+      // ReadVariableOp.
+      tensor_names.push_back(node_name + "/Read/ReadVariableOp:0");
+    } else {
+      tensor_names.push_back(node_name + ":0");
+    }
+  }
+  std::vector<Tensor> outputs;
+  TF_RETURN_IF_ERROR(
+      session->Run(/* inputs */ {}, tensor_names, /* targets */ {}, &outputs));
+  for (size_t i = 0; i < variable_names.size(); i++) {
+    (*variable_name_to_value_map)[variable_names[i]] = outputs[i];
+  }
+  return Status::OK();
+}
+
+// Converts a Variable NodeDef into a Constant NodeDef.
+void ConvertVariableToConstant(const NodeDef &variable_node,
+                               const Tensor &variable_value,
+                               NodeDef *const_node) {
+  const_node->set_name(variable_node.name());
+  const_node->set_op("Const");
+  (*const_node->mutable_attr())["dtype"] = variable_node.attr().at("dtype");
+  variable_value.AsProtoTensorContent(
+      (*const_node->mutable_attr())["value"].mutable_tensor());
+}
+
+// Converts a ReadVariableOp NodeDef to an Identity NodeDef.
+void ConvertReadVariableOpToIdentity(const NodeDef &node,
+                                     NodeDef *identity_node) {
+  identity_node->set_name(node.name());
+  identity_node->set_op("Identity");
+  (*identity_node->mutable_attr())["T"] = node.attr().at("dtype");
+  identity_node->add_input(node.input(0));
+}
+
+// Freezes the subgraph of all nodes needed by `outputs`.
+Status FreezeGraphDef(const MetaGraphDef &meta_graph_def, Session *session,
+                      const std::unordered_set<string> &outputs,
+                      GraphDef *frozen_graph_def) {
+  GraphDef graph_def = meta_graph_def.graph_def();
+  // Copy versions and library as-is from original graph.
+  *frozen_graph_def->mutable_versions() = graph_def.versions();
+  *frozen_graph_def->mutable_library() = graph_def.library();
+  // If the graph is empty there is nothing left to do.
+  if (graph_def.node_size() == 0) {
+    return Status::OK();
+  }
+  // name_to_node_map is needed to get the inputs from the NodeDef corresponding
+  // the a string node name. These inputs are used when doing our backwards
+  // traversal.
+  std::unordered_map<string, NodeDef *> name_to_node_map;
+  GetNodeNameToNodeDefMap(&graph_def, &name_to_node_map);
+  std::unordered_set<string> reachable_node_names;
+  std::unordered_set<string> variable_node_names;
+  GetReachableNodesAndVariables(&graph_def, outputs, name_to_node_map,
+                                &reachable_node_names, &variable_node_names);
+  std::unordered_map<string, Tensor> variable_to_value_map;
+  TF_RETURN_IF_ERROR(GetVariableNameToTensorMap(
+      session, name_to_node_map, variable_node_names, &variable_to_value_map));
+  // We copy the nodes in the same order they were in the original graph_def.
+  for (const NodeDef &node : graph_def.node()) {
+    if (reachable_node_names.find(node.name()) == reachable_node_names.end()) {
+      continue;
+    }
+    if (variable_node_names.find(node.name()) != variable_node_names.end()) {
+      VLOG(1) << "Freezing variable " << node.name();
+      ConvertVariableToConstant(node, variable_to_value_map[node.name()],
+                                frozen_graph_def->add_node());
+    } else if (node.op() == "ReadVariableOp" &&
+               variable_node_names.find(node.input(0)) !=
+                   variable_node_names.end()) {
+      // If the node is a ReadVariableOp, its input VarHandleOp will be
+      // converted to a Constant, so we will need to convert it to an Identity.
+      VLOG(2) << "Converting read op to identity " << node.name();
+      ConvertReadVariableOpToIdentity(node, frozen_graph_def->add_node());
+    } else {
+      // If the node isn't a variable, just copy the node as-is.
+      *frozen_graph_def->add_node() = node;
+    }
+  }
+  return Status::OK();
+}
+
+} // namespace
+
+Status FreezeSavedModel(const MetaGraphDef &meta_graph_def, Session *session,
+                        GraphDef *frozen_graph_def,
+                        std::unordered_set<string> *inputs,
+                        std::unordered_set<string> *outputs) {
+
+  GetSignatureDefsInputsAndOutputs(meta_graph_def.signature_def(), inputs,
+                                   outputs);
+  TF_RETURN_IF_ERROR(
+      FreezeGraphDef(meta_graph_def, session, *outputs, frozen_graph_def));
+  return Status::OK();
+}
+
+} // namespace tensorflow
diff --git a/tf_trt_cpp_example/freeze_saved_model.h b/tf_trt_cpp_example/freeze_saved_model.h
@@ -0,0 +1,47 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_
+#define TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_
+
+#include <unordered_set>
+
+#include "tensorflow/cc/saved_model/loader.h"
+#include "tensorflow/core/framework/graph.pb.h"
+#include "tensorflow/core/lib/core/status.h"
+
+namespace tensorflow {
+
+// Returns a frozen GraphDef, input tensors, and output tensors from the loaded
+// SavedModelBundle.
+// `inputs` and `outputs` consist of the union of all inputs and outputs in the
+// SignatureDefs in the SavedModelBundle.
+// FreezeSavedModel sets `frozen_graph_def` to a GraphDef of all nodes needed by
+// `outputs`. All variables in the supplied SavedModelBundle are converted to
+// constants, set to the value of the variables, by running the restored Session
+// in the SavedModelBundle.
+// WARNING: Only the variable checkpoints will be reflected in the frozen
+// graph_def. All saved_model assets will be ignored.
+// Status FreezeSavedModel(const SavedModelBundle &saved_model_bundle,
+//                         GraphDef *frozen_graph_def,
+//                         std::unordered_set<string> *inputs,
+//                         std::unordered_set<string> *outputs);
+
+Status FreezeSavedModel(const MetaGraphDef &meta_graph_def, Session *session,
+                        GraphDef *frozen_graph_def,
+                        std::unordered_set<string> *inputs,
+                        std::unordered_set<string> *outputs);
+} // namespace tensorflow
+
+#endif // TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_