Skip to content

Commit

Permalink
Example code for c++ conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
tfeher committed Sep 8, 2021
1 parent 946d1c4 commit e6d0699
Show file tree
Hide file tree
Showing 13 changed files with 12,130 additions and 0 deletions.
24 changes: 24 additions & 0 deletions tf_trt_cpp_example/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.13)

project(TF_TRT_Example)

set(CMAKE_CXX_STANDARD 14)

add_executable(tf_trt_example main.cc mnist.h mnist.cc trt_convert.cc freeze_saved_model.cc)

target_link_libraries(tf_trt_example tensorflow_cc)
target_link_libraries(tf_trt_example tensorflow_framework)

target_compile_options(tf_trt_example PRIVATE -D_GLIBCXX_USE_CXX11_ABI=0 -DGOOGLE_CUDA -DGOOGLE_TENSORRT)

# To find freeze_saved_model.h
target_compile_options(tf_trt_example PRIVATE -I/opt/tensorflow/tensorflow-source)

target_link_directories(tf_trt_example PRIVATE /usr/local/lib/python3.8/dist-packages/tensorflow)
target_link_directories(tf_trt_example PRIVATE /usr/local/lib/tensorflow)

target_compile_options(tf_trt_example PRIVATE -Wl,-rpath=/usr/local/lib/python3.8/dist-packages/tensorflow)

target_include_directories(tf_trt_example PRIVATE /usr/local/lib/python3.8/dist-packages/tensorflow/include)
target_include_directories(tf_trt_example PRIVATE /data/tensorflow-source/bazel-out/k8-opt/bin/external/local_config_cuda/cuda/_virtual_includes/cuda_headers_virtual)
target_include_directories(tf_trt_example PRIVATE /data/tensorflow-source/bazel-out/k8-opt/bin/external/local_config_tensorrt/_virtual_includes/tensorrt_headers)
62 changes: 62 additions & 0 deletions tf_trt_cpp_example/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# TF-TRT example for conversion in C++

## Introduction

This directory contains example code to demonstrate the steps necessary to run TF-TRT conversion from C++. This is a work in progress.
The goal of this example is to facilitate discussion on how to provide a convenient interface for initiating conversion from C++ and to test possible implementations.

The steps for model conversion

0. Inline functions
1. Freeze the graph
2. Run Grappler with TRTOptimizer pass
3. Infer the graph to provide shape information
4. Infer the graph to build the engines
5. Convert the graph_def to have static engines

Steps 3-4 are done in one go, the last input data that is provided for shape information also triggers building the engines.
To implement these steps without rewriting the graph, we have added the n_build_pass converter option.
Otherwise it would be necessary to change the `profile_generation_mode_` attribute of the graph to enable / disable profile information collection.


The final example code shall probably be hosted in the tensorflow/tensorrt repository, and this PR shall contain only the necessary changes in the TF codebase.

### Limitations
- Freezing the model is not implemented correctly, the current example only works with frozen graphs.
- Some of the intermediate steps are possibly inefficient

### Acknowledgment

This example is based on https://github.com/bmzhao/saved-model-example

## How to run

### Build TF
```
git clone --branch trt_cpp_conversion_example https://github.com/tfeher/tensorflow.git tensorflow-source
mkdir bazel-cache
nvidia-docker run --rm -it --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 -v $PWD:/data -w /data -v $PWD/bazel-cache:/root/.cache/bazel nvcr.io/nvidia/tensorflow:21.06-tf2-py3
# Inside the container
cp /opt/tensorflow/nvbuild* /opt/tensorflow/bazel_build.sh .
./nvbuild.sh --noclean --v2
```

### Build the TF-TRT example
```
cd tensorflow-source/tf_trt_cpp_example
mkdir build
cd build
cmake ..
make
```

### Train the model and save it
```
python mnist_train.py
```

### Run TF-TRT conversion and infer the converted model
```
TF_CPP_VMODULE=trt_convert=2,trt_optimization_pass=2,trt_engine_utils=2,trt_engine_op=2,segment=2,trt_shape_optimization_profiles=2,trt_lru_cache=2,convert_graph=2,trt_engine_resource_ops=2 ./tf_trt_example
```
11 changes: 11 additions & 0 deletions tf_trt_cpp_example/create_symlinks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

# Create missing symlinks
pushd /usr/local/lib/tensorflow
ln -s libtensorflow_cc.so.2 libtensorflow_cc.so
cd /usr/local/lib/python3.8/dist-packages/tensorflow
ln -s libtensorflow_framework.so.2 libtensorflow_framework.so
popd

# mkdir /usr/local/lib/python3.8/dist-packages/tensorflow/include/third_party/tensorrt
# cp /usr/include/x86_64-linux-gnu/NvInfer.h /usr/local/lib/python3.8/dist-packages/tensorflow/include/third_party/tensorrt
237 changes: 237 additions & 0 deletions tf_trt_cpp_example/freeze_saved_model.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include "freeze_saved_model.h"

#include <iostream>
#include <queue>

#include "tensorflow/core/framework/attr_value.pb.h"
#include "tensorflow/core/framework/function.pb.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/framework/node_def.pb.h"
#include "tensorflow/core/framework/versions.pb.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/protobuf/meta_graph.pb.h"
#include <google/protobuf/map.h>

namespace tensorflow {

namespace {

// Gets tensor names from tensor_info and inserts them into the set of tensor
// names.
void GetTensorNamesFromTensorInfo(const TensorInfo &tensor_info,
std::unordered_set<string> *tensor_names) {
if (tensor_info.has_coo_sparse()) {
// If the tensor is sparse we have to add all three tensors of the sparse
// representations.
const TensorInfo_CooSparse &coo_sparse = tensor_info.coo_sparse();
tensor_names->insert(coo_sparse.values_tensor_name());
tensor_names->insert(coo_sparse.indices_tensor_name());
tensor_names->insert(coo_sparse.dense_shape_tensor_name());
} else if (tensor_info.has_composite_tensor()) {
for (const auto &component : tensor_info.composite_tensor().components()) {
tensor_names->insert(component.name());
}
} else {
tensor_names->insert(tensor_info.name());
}
}

// Gets the union of all inputs and outputs of all SignatureDefs in the bundle
void GetSignatureDefsInputsAndOutputs(
const google::protobuf::Map<string, SignatureDef> &signature_def,
std::unordered_set<string> *inputs, std::unordered_set<string> *outputs) {
for (auto &sigdef_elem : signature_def) {
const SignatureDef &signature_def = sigdef_elem.second;
for (auto &input_elem : signature_def.inputs()) {
GetTensorNamesFromTensorInfo(input_elem.second, inputs);
}
for (auto &output_elem : signature_def.outputs()) {
GetTensorNamesFromTensorInfo(output_elem.second, outputs);
}
}
}

// Gets a map from string node name to NodeDef.
void GetNodeNameToNodeDefMap(
GraphDef *graph_def,
std::unordered_map<string, NodeDef *> *name_to_node_map) {
for (size_t i = 0; i < graph_def->node_size(); i++) {
NodeDef *node = graph_def->mutable_node(i);
(*name_to_node_map)[node->name()] = node;
}
}

// Strips off the tensor part of the tensor_name to get the node_name.
const string GetNodeNameFromTensorName(string tensor_name) {
if (tensor_name[0] == '^') {
tensor_name.erase(0, 1);
}
std::vector<string> tensor_name_parts = str_util::Split(tensor_name, ':');
return tensor_name_parts[0];
}

// Gets the set of node names needed by `outputs` and the corresponding set of
// variable nodes to convert.
void GetReachableNodesAndVariables(
GraphDef *graph_def, const std::unordered_set<string> &outputs,
const std::unordered_map<string, NodeDef *> &name_to_node_map,
std::unordered_set<string> *reachable_node_names,
std::unordered_set<string> *variable_node_names) {
// TODO(suharshs): Add support for ResourceVariables.
static const std::unordered_set<string> *kVariableTypes =
new std::unordered_set<string>({"Variable", "VariableV2", "VarHandleOp"});

std::queue<string> nodes_to_visit;
for (const string &output_tensor_name : outputs) {
nodes_to_visit.push(GetNodeNameFromTensorName(output_tensor_name));
}
// We do a traversal backwards from the outputs specified in the MetaGraphDef.
while (!nodes_to_visit.empty()) {
const string node_name = nodes_to_visit.front();
nodes_to_visit.pop();
if (reachable_node_names->find(node_name) != reachable_node_names->end()) {
continue;
}
reachable_node_names->insert(node_name);
NodeDef *node = name_to_node_map.at(node_name);
if (kVariableTypes->find(node->op()) != kVariableTypes->end()) {
variable_node_names->insert(node->name());
}
for (const string &input_tensor_name : node->input()) {
nodes_to_visit.push(GetNodeNameFromTensorName(input_tensor_name));
}
}
}

// Gets a map from variable name to variable value.
Status GetVariableNameToTensorMap(
Session *session,
const std::unordered_map<string, NodeDef *> &name_to_node_map,
std::unordered_set<string> variable_names_set,
std::unordered_map<string, Tensor> *variable_name_to_value_map) {
if (variable_names_set.empty()) {
return Status::OK();
}
std::vector<string> variable_names;
variable_names.reserve(variable_names_set.size());
std::vector<string> tensor_names;
tensor_names.reserve(variable_names_set.size());
for (const string &node_name : variable_names_set) {
variable_names.push_back(node_name);
NodeDef *node_def = name_to_node_map.at(node_name);
if (node_def->op() == "VarHandleOp") {
// If this is a resource variable, we have to run the corresponding
// ReadVariableOp.
tensor_names.push_back(node_name + "/Read/ReadVariableOp:0");
} else {
tensor_names.push_back(node_name + ":0");
}
}
std::vector<Tensor> outputs;
TF_RETURN_IF_ERROR(
session->Run(/* inputs */ {}, tensor_names, /* targets */ {}, &outputs));
for (size_t i = 0; i < variable_names.size(); i++) {
(*variable_name_to_value_map)[variable_names[i]] = outputs[i];
}
return Status::OK();
}

// Converts a Variable NodeDef into a Constant NodeDef.
void ConvertVariableToConstant(const NodeDef &variable_node,
const Tensor &variable_value,
NodeDef *const_node) {
const_node->set_name(variable_node.name());
const_node->set_op("Const");
(*const_node->mutable_attr())["dtype"] = variable_node.attr().at("dtype");
variable_value.AsProtoTensorContent(
(*const_node->mutable_attr())["value"].mutable_tensor());
}

// Converts a ReadVariableOp NodeDef to an Identity NodeDef.
void ConvertReadVariableOpToIdentity(const NodeDef &node,
NodeDef *identity_node) {
identity_node->set_name(node.name());
identity_node->set_op("Identity");
(*identity_node->mutable_attr())["T"] = node.attr().at("dtype");
identity_node->add_input(node.input(0));
}

// Freezes the subgraph of all nodes needed by `outputs`.
Status FreezeGraphDef(const MetaGraphDef &meta_graph_def, Session *session,
const std::unordered_set<string> &outputs,
GraphDef *frozen_graph_def) {
GraphDef graph_def = meta_graph_def.graph_def();
// Copy versions and library as-is from original graph.
*frozen_graph_def->mutable_versions() = graph_def.versions();
*frozen_graph_def->mutable_library() = graph_def.library();
// If the graph is empty there is nothing left to do.
if (graph_def.node_size() == 0) {
return Status::OK();
}
// name_to_node_map is needed to get the inputs from the NodeDef corresponding
// the a string node name. These inputs are used when doing our backwards
// traversal.
std::unordered_map<string, NodeDef *> name_to_node_map;
GetNodeNameToNodeDefMap(&graph_def, &name_to_node_map);
std::unordered_set<string> reachable_node_names;
std::unordered_set<string> variable_node_names;
GetReachableNodesAndVariables(&graph_def, outputs, name_to_node_map,
&reachable_node_names, &variable_node_names);
std::unordered_map<string, Tensor> variable_to_value_map;
TF_RETURN_IF_ERROR(GetVariableNameToTensorMap(
session, name_to_node_map, variable_node_names, &variable_to_value_map));
// We copy the nodes in the same order they were in the original graph_def.
for (const NodeDef &node : graph_def.node()) {
if (reachable_node_names.find(node.name()) == reachable_node_names.end()) {
continue;
}
if (variable_node_names.find(node.name()) != variable_node_names.end()) {
VLOG(1) << "Freezing variable " << node.name();
ConvertVariableToConstant(node, variable_to_value_map[node.name()],
frozen_graph_def->add_node());
} else if (node.op() == "ReadVariableOp" &&
variable_node_names.find(node.input(0)) !=
variable_node_names.end()) {
// If the node is a ReadVariableOp, its input VarHandleOp will be
// converted to a Constant, so we will need to convert it to an Identity.
VLOG(2) << "Converting read op to identity " << node.name();
ConvertReadVariableOpToIdentity(node, frozen_graph_def->add_node());
} else {
// If the node isn't a variable, just copy the node as-is.
*frozen_graph_def->add_node() = node;
}
}
return Status::OK();
}

} // namespace

Status FreezeSavedModel(const MetaGraphDef &meta_graph_def, Session *session,
GraphDef *frozen_graph_def,
std::unordered_set<string> *inputs,
std::unordered_set<string> *outputs) {

GetSignatureDefsInputsAndOutputs(meta_graph_def.signature_def(), inputs,
outputs);
TF_RETURN_IF_ERROR(
FreezeGraphDef(meta_graph_def, session, *outputs, frozen_graph_def));
return Status::OK();
}

} // namespace tensorflow
47 changes: 47 additions & 0 deletions tf_trt_cpp_example/freeze_saved_model.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
#ifndef TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_
#define TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_

#include <unordered_set>

#include "tensorflow/cc/saved_model/loader.h"
#include "tensorflow/core/framework/graph.pb.h"
#include "tensorflow/core/lib/core/status.h"

namespace tensorflow {

// Returns a frozen GraphDef, input tensors, and output tensors from the loaded
// SavedModelBundle.
// `inputs` and `outputs` consist of the union of all inputs and outputs in the
// SignatureDefs in the SavedModelBundle.
// FreezeSavedModel sets `frozen_graph_def` to a GraphDef of all nodes needed by
// `outputs`. All variables in the supplied SavedModelBundle are converted to
// constants, set to the value of the variables, by running the restored Session
// in the SavedModelBundle.
// WARNING: Only the variable checkpoints will be reflected in the frozen
// graph_def. All saved_model assets will be ignored.
// Status FreezeSavedModel(const SavedModelBundle &saved_model_bundle,
// GraphDef *frozen_graph_def,
// std::unordered_set<string> *inputs,
// std::unordered_set<string> *outputs);

Status FreezeSavedModel(const MetaGraphDef &meta_graph_def, Session *session,
GraphDef *frozen_graph_def,
std::unordered_set<string> *inputs,
std::unordered_set<string> *outputs);
} // namespace tensorflow

#endif // TENSORFLOW_CC_TOOLS_FREEZE_SAVED_MODEL_H_

0 comments on commit e6d0699

Please sign in to comment.