Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TFTRT: Support LeakyRelu op #25202

Merged
1 change: 1 addition & 0 deletions tensorflow/compiler/tf2tensorrt/convert/convert_graph.cc
Expand Up @@ -105,6 +105,7 @@ Status TrtCandidateSelector::IsTensorRTCandidate(const tensorflow::Node* node) {
"FusedBatchNorm",
"FusedBatchNormV2",
"Identity",
"LeakyRelu",
"Log",
"MatMul",
"Max",
Expand Down
106 changes: 82 additions & 24 deletions tensorflow/compiler/tf2tensorrt/convert/convert_nodes.cc
Expand Up @@ -351,6 +351,26 @@ nvinfer1::ITensor* Converter::CreateConstantLayer(
return trt_tensor;
}

tensorflow::Status CreateBroadcastableScalarConstant(
OpConverterParams* params, float value, const nvinfer1::Dims& dims,
const nvinfer1::ITensor** tensor) {
// In order to be broadcastable, the number of dims has to match.
nvinfer1::Dims broadcastable_dims(dims);
for (int i = 0; i < broadcastable_dims.nbDims; i++) {
broadcastable_dims.d[i] = 1;
}
TRT_ShapedWeights weights = params->weight_store->GetTempWeights(
tensorflow::DataType::DT_FLOAT, broadcastable_dims);
auto weights_ptr =
static_cast<float*>(const_cast<void*>(weights.GetValues()));
weights_ptr[0] = value;
*tensor = params->converter->CreateConstantLayer(weights, broadcastable_dims);
TFTRT_RETURN_ERROR_IF_NULLPTR(*tensor, params->node_def.name());
params->converter->ProvideQuantizationRange(
const_cast<nvinfer1::ITensor*>(*tensor), value, value);
return Status::OK();
}

inline bool DimsEqual(const nvinfer1::Dims& dim_l,
const nvinfer1::Dims& dim_r) {
if (dim_l.nbDims != dim_r.nbDims) {
Expand Down Expand Up @@ -2430,6 +2450,57 @@ tensorflow::Status ConvertPool(OpConverterParams* params) {
return tensorflow::Status::OK();
}

// TODO(tmorris): Use ActivationType::kLEAKY_RELU in TRT 5.1+ once perf
// improves.
tensorflow::Status ConvertLeakyRelu(OpConverterParams* params) {
const auto& inputs = params->inputs;
const auto& node_def = params->node_def;
if (inputs.size() != 1) {
return tensorflow::errors::InvalidArgument(
node_def.op(), " expects one input, at ", node_def.name());
}
if (!inputs.at(0).is_tensor()) {
return tensorflow::errors::Unimplemented(
node_def.op(), " is only implemented for tensors, at ",
node_def.name());
}
TFAttrs attrs(node_def);
const float alpha = attrs.get<float>("alpha");
if (alpha < 0.0f || alpha > 1.0f) {
return tensorflow::errors::Unimplemented(
"Alpha value for LeakyRelu must be between 0 and 1, at ",
node_def.name());
}
if (params->validation_only) return tensorflow::Status::OK();

// Input Tensor
const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
// Create const for alpha.
const nvinfer1::ITensor* const_alpha_tensor = nullptr;
TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
params, alpha, tensor->getDimensions(), &const_alpha_tensor));
// alpha * x
nvinfer1::IElementWiseLayer* mul_layer =
params->converter->network()->addElementWise(
*const_cast<nvinfer1::ITensor*>(tensor),
*const_cast<nvinfer1::ITensor*>(const_alpha_tensor),
nvinfer1::ElementWiseOperation::kPROD);
TFTRT_RETURN_ERROR_IF_NULLPTR(mul_layer, node_def.name());
// max(x, alpha * x)
nvinfer1::IElementWiseLayer* max_layer =
params->converter->network()->addElementWise(
*const_cast<nvinfer1::ITensor*>(tensor),
*const_cast<nvinfer1::ITensor*>(mul_layer->getOutput(0)),
nvinfer1::ElementWiseOperation::kMAX);
TFTRT_RETURN_ERROR_IF_NULLPTR(max_layer, node_def.name());
nvinfer1::ITensor* output_tensor = max_layer->getOutput(0);
params->converter->MarkQuantizationRangesAsInferrable(
output_tensor, const_cast<nvinfer1::ITensor*>(mul_layer->getOutput(0)));

params->outputs->push_back(TRT_TensorOrWeights(output_tensor));
return Status::OK();
}

tensorflow::Status ConvertActivation(OpConverterParams* params) {
const auto& inputs = params->inputs;
const auto& node_def = params->node_def;
Expand All @@ -2449,9 +2520,9 @@ tensorflow::Status ConvertActivation(OpConverterParams* params) {
};
auto op_pair = ops.find(node_def.op());
if (op_pair == ops.end()) {
return tensorflow::errors::Unimplemented(
"Activation op: ", node_def.op(),
" not supported at: ", node_def.name());
return tensorflow::errors::Unimplemented("Activation op: ", node_def.op(),
" not supported at: ",
node_def.name());
}
if (params->validation_only) return tensorflow::Status::OK();

Expand Down Expand Up @@ -2539,8 +2610,7 @@ Status ConvertQuantize(OpConverterParams* params) {
return Status::OK();
}

// TODO(pdavoodi): we should update relu6 implementation once TensorRT supports
// Relu6 natively.
// TODO(tmorris): Use ActivationType::kCLIP in TRT 5.1+ once perf improves.
tensorflow::Status ConvertRelu6(OpConverterParams* params) {
const auto& inputs = params->inputs;
const auto& node_def = params->node_def;
Expand Down Expand Up @@ -2576,32 +2646,19 @@ tensorflow::Status ConvertRelu6(OpConverterParams* params) {
params->converter->ProvideQuantizationRange(relu_layer->getOutput(0), 0.0f,
6.0f);

// Create a constant layer to store the floating point weight i.e. 6.0f This
// tensor will be broadcasted uniformly during elementwise `min` operation.
// The constant has to have the same rank as the input in order for TRT to
// broadcast
nvinfer1::Dims dims;
dims.nbDims = relu_layer->getOutput(0)->getDimensions().nbDims;
for (int i = 0; i < dims.nbDims; i++) {
dims.d[i] = 1;
}
TRT_ShapedWeights weights = params->weight_store->GetTempWeights(
tensorflow::DataType::DT_FLOAT, dims);
auto weights_ptr =
static_cast<float*>(const_cast<void*>(weights.GetValues()));
weights_ptr[0] = 6.0f;
nvinfer1::ITensor* const6_tensor =
params->converter->CreateConstantLayer(weights, dims);
TFTRT_RETURN_ERROR_IF_NULLPTR(const6_tensor, node_def.name());
params->converter->ProvideQuantizationRange(const6_tensor, 0.0f, 6.0f);
// Create a constant layer to store the floating point weight i.e. 6.0f
const nvinfer1::ITensor* const6_tensor = nullptr;
TF_RETURN_IF_ERROR(CreateBroadcastableScalarConstant(
params, 6.0f, relu_layer->getOutput(0)->getDimensions(), &const6_tensor));

// ElementWise Min Operation
// Min op is a nop for INT8 execution path, as the input tensor
// to this layer will only have values in range [0.f, 6.0f].
nvinfer1::IElementWiseLayer* relu6_layer =
params->converter->network()->addElementWise(
*const_cast<nvinfer1::ITensor*>(relu_layer->getOutput(0)),
*const6_tensor, nvinfer1::ElementWiseOperation::kMIN);
*const_cast<nvinfer1::ITensor*>(const6_tensor),
nvinfer1::ElementWiseOperation::kMIN);
TFTRT_RETURN_ERROR_IF_NULLPTR(relu6_layer, node_def.name());
nvinfer1::ITensor* output_tensor = relu6_layer->getOutput(0);
params->converter->ProvideQuantizationRange(output_tensor, 0.0f, 6.0f);
Expand Down Expand Up @@ -3593,6 +3650,7 @@ static void RegisterValidatableOpConverters(
(*registration)["Conv2D"] = ConvertConv2D;
(*registration)["DepthwiseConv2dNative"] = ConvertConv2DDepthwise;
(*registration)["ExpandDims"] = ConvertExpandDims;
(*registration)["LeakyRelu"] = ConvertLeakyRelu;
(*registration)["MatMul"] = ConvertMatMul;
(*registration)["Pad"] = ConvertPad;
(*registration)["Relu6"] = ConvertRelu6;
Expand Down
68 changes: 23 additions & 45 deletions tensorflow/compiler/tf2tensorrt/convert/convert_nodes_test.cc
Expand Up @@ -2068,48 +2068,6 @@ TEST_F(OpConverterTest, ConvertQuantize) {
}
}

TEST_F(OpConverterTest, ConvertRelu6) {
{
// Input list is empty, should fail.
NodeDef node_def = MakeNodeDef("my_relu6", "Relu6", {});
RunValidationAndConversion(
node_def, error::INVALID_ARGUMENT,
"Invalid number of inputs for Relu6, at my_relu6");
}

// Get the NodeDef for Relu6.
Scope s = Scope::NewRootScope();
auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
auto relu6 = ops::Relu6(s.WithOpName("my_relu6"), input);
const NodeDef node_def = relu6.operation.node()->def();
{
// Input is weights, should fail.
Reset();
AddTestWeights<float>("input", {1}, {1.0f});
RunValidationAndConversion(
node_def, error::UNIMPLEMENTED,
"Relu6 is only implemented for tensors, not weights, at my_relu6");
}
{
// Clip tensor values and set quantization ranges, ok.
Reset();
AddTestTensor("input", {1, 2, 3});
RunValidationAndConversion(node_def);
TRT_TensorOrWeights output;
TF_EXPECT_OK(GetTensorOrWeights("my_relu6", &output));
EXPECT_TRUE(output.is_tensor());
auto ranges = quantization_ranges();
EXPECT_EQ(ranges[output.tensor()], 6.0f);

const DataVec input_data{
{"input", test::AsTensor<float>({-100, -1, 0, 3, 5, 9})}};
DataVec output_data{{"my_relu6", ConstructTensor<float>(6)}};
BuildAndRun(input_data, &output_data);
EXPECT_THAT(GetSpanForData<float>(output_data[0]),
ElementsAre(0, 0, 0, 3, 5, 6));
}
}

template <DataType dtype>
void TestConvertSquare(OpConverterTest* test) {
test->Reset();
Expand Down Expand Up @@ -2189,13 +2147,23 @@ TEST_F(OpConverterTest, ConvertActivation) {
"Relu is only implemented for tensors, at my_act");
}

constexpr float kAlpha = 0.2f;

// Get nodedef for activation layer.
auto get_act_nodedef = [](string op_name) -> NodeDef {
Scope s = Scope::NewRootScope();
auto input = ops::Placeholder(s.WithOpName("input"), DT_FLOAT);
if (op_name == "Relu") {
if (op_name == "LeakyRelu") {
// LeakyRelu does not have a C++ API
NodeDef node_def = MakeNodeDef("my_act", "LeakyRelu", {"input"});
(*node_def.mutable_attr())["alpha"].set_f(kAlpha);
return node_def;
} else if (op_name == "Relu") {
auto act = ops::Relu(s.WithOpName("my_act"), input);
return act.operation.node()->def();
} else if (op_name == "Relu6") {
auto act = ops::Relu6(s.WithOpName("my_act"), input);
return act.operation.node()->def();
} else if (op_name == "Sigmoid") {
auto act = ops::Sigmoid(s.WithOpName("my_act"), input);
return act.operation.node()->def();
Expand All @@ -2208,8 +2176,12 @@ TEST_F(OpConverterTest, ConvertActivation) {
};
// Get expected output for activation layer.
auto get_act_output = [](string op_name, float input) -> float {
if (op_name == "Relu") {
if (op_name == "LeakyRelu") {
return (input > 0.0f) ? input : input * kAlpha;
} else if (op_name == "Relu") {
return (input > 0.0f) ? input : 0.0f;
} else if (op_name == "Relu6") {
return std::min(std::max(input, 0.0f), 6.0f);
} else if (op_name == "Sigmoid") {
return 1.0f / (1.0f + std::exp(-input));
} else if (op_name == "Tanh") {
Expand All @@ -2220,7 +2192,8 @@ TEST_F(OpConverterTest, ConvertActivation) {
};

// Ok.
for (string op_name : {"Relu", "Sigmoid", "Tanh"}) {
for (const string& op_name :
{"LeakyRelu", "Relu", "Relu6", "Sigmoid", "Tanh"}) {
Reset();
NodeDef node_def = get_act_nodedef(op_name);
AddTestTensor("input", {1, 2, 3});
Expand All @@ -2229,6 +2202,11 @@ TEST_F(OpConverterTest, ConvertActivation) {
TF_EXPECT_OK(GetTensorOrWeights("my_act", &output));
EXPECT_TRUE(output.is_tensor());
ExpectTrtDimsEqualsArray({1, 2, 3}, output.tensor()->getDimensions());
if (op_name == "Relu6") {
// Relu6 should set quantization range automatically.
auto ranges = quantization_ranges();
EXPECT_EQ(ranges[output.tensor()], 6.0f);
}

const std::vector<float> input = {-100, -2, -1, 0, 1, 100};
const DataVec input_data{{"input", test::AsTensor<float>(input)}};
Expand Down