Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bugfix] Fix Quantized Leaky ReLU TFLite inference Ops. #37279

Merged
merged 5 commits into from
Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
71 changes: 36 additions & 35 deletions tensorflow/lite/kernels/activations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,10 @@ struct LogSoftmaxOpData : public OpData {
};

struct LeakyReluOpData : public OpData {
uint8_t q_alpha;
int32_t output_multiplier = 0;
int output_shift = 0;
int32_t output_multiplier_alpha = 0;
wuhy08 marked this conversation as resolved.
Show resolved Hide resolved
int32_t output_shift_alpha = 0;
int32_t output_multiplier_identity = 0;
int32_t output_shift_identity = 0;
};

struct PreluOpData : public OpData {
Expand Down Expand Up @@ -363,20 +364,17 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {

LeakyReluOpData* data = reinterpret_cast<LeakyReluOpData*>(node->user_data);

if (output->type == kTfLiteUInt8) {
if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
const auto* params =
reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
// Quantize the alpha with same zero-point and scale as of input
data->q_alpha = static_cast<uint8_t>(std::max<float>(
std::numeric_limits<uint8_t>::min(),
std::min<float>(std::numeric_limits<uint8_t>::max(),
std::round(input->params.zero_point +
(params->alpha / input->params.scale)))));

double real_multiplier =
input->params.scale * input->params.scale / output->params.scale;
QuantizeMultiplierSmallerThanOneExp(
real_multiplier, &data->output_multiplier, &data->output_shift);
double alpha_multiplier =
input->params.scale * params->alpha / output->params.scale;
QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha,
&data->output_shift_alpha);
double identity_multiplier = input->params.scale / output->params.scale;
QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity,
&data->output_shift_identity);
}
return context->ResizeTensor(context, output,
TfLiteIntArrayCopy(input->dims));
Expand Down Expand Up @@ -1087,24 +1085,6 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
}
}

namespace {
template <typename T>
void QLeakyRelu(const TfLiteTensor* input, TfLiteTensor* output, float alpha,
const LeakyReluOpData* data) {
LeakyReluParams op_params;
op_params.input_offset = input->params.zero_point;
op_params.alpha_offset = input->params.zero_point;
op_params.output_offset = output->params.zero_point;

op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;

reference_ops::QuantizeLeakyRelu(
op_params, data->q_alpha, GetTensorShape(input), GetTensorData<T>(input),
GetTensorShape(output), GetTensorData<T>(output));
}
} // namespace

TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
Expand All @@ -1114,21 +1094,42 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
reinterpret_cast<LeakyReluOpData*>(node->user_data);

LeakyReluParams op_params;
op_params.alpha = params->alpha;
switch (input->type) {
case kTfLiteFloat32: {
op_params.alpha = params->alpha;
optimized_ops::LeakyRelu(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
QLeakyRelu<uint8_t>(input, output, params->alpha, data);
op_params.input_offset = input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier_alpha = data->output_multiplier_alpha;
op_params.output_shift_alpha = data->output_shift_alpha;
op_params.output_multiplier_identity = data->output_multiplier_identity;
op_params.output_shift_identity = data->output_shift_identity;
reference_ops::QuantizeLeakyRelu(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
return kTfLiteOk;
} break;
case kTfLiteInt8: {
op_params.input_offset = input->params.zero_point;
op_params.output_offset = output->params.zero_point;
op_params.output_multiplier_alpha = data->output_multiplier_alpha;
op_params.output_shift_alpha = data->output_shift_alpha;
op_params.output_multiplier_identity = data->output_multiplier_identity;
op_params.output_shift_identity = data->output_shift_identity;
reference_ops::QuantizeLeakyRelu(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
return kTfLiteOk;
} break;
default:
context->ReportError(
context, "Only float32 and uint8 is supported currently, got %s.",
context,
"Only float32, int8 and uint8 is supported currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
Expand Down
46 changes: 33 additions & 13 deletions tensorflow/lite/kernels/activations_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,11 @@ class BaseActivationsOpModel : public SingleOpModel {
// A dedicated constructor for LeakyRelu, which does some options.
BaseActivationsOpModel(TensorData input, float alpha) {
input_ = AddInput(input);
if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, input.min, input.max});
} else if (input.type == TensorType_INT8) {
output_ = AddOutput({TensorType_INT8, {}, input.min, input.max});
// The output scale and input scale might be different.
if (input.type == TensorType_UINT8 || input.type == TensorType_INT8) {
auto output_min = (input.min >= 0) ? input.min : input.min * alpha;
auto output_max = (input.max >= 0) ? input.max : input.max * alpha;
output_ = AddOutput({input.type, {}, output_min, output_max});
} else {
output_ = AddOutput({input.type, {}});
}
Expand Down Expand Up @@ -488,15 +489,34 @@ TEST(QuantizedActivationsOpTest, LeakyReluUint8) {
0.0f, 1.0f, 3.0f, // Row 1
1.0f, -0.5f, -1.0f, // Row 2
},
kQuantizedTolerance)));
EXPECT_THAT(m.GetOutput<uint8_t>(), ElementsAreArray({
128,
144,
176,
144,
120,
112,
}));
kQuantizedTolerance * 8)));
}

TEST(QuantizedActivationsOpTest, LeakyReluInt8) {
const float kMin = -1;
const float kMax = 127.f / 128.f;

QuantizedActivationsOpModel m(
/*input=*/{TensorType_INT8, {5, 5}, 5 * kMin, 5 * kMax}, 0.1);

m.SetInput<int8_t>({
-5.0f, -4.6f, -4.2f, -3.8f, -3.4f, // Row 1
-3.0f, -2.6f, -2.2f, -1.8f, -1.4f, // Row 2
-1.0f, -0.6f, -0.2f, 0.2f, 0.6f, // Row 3
1.0f, 1.4f, 1.8f, 2.2f, 2.6f, // Row 4
3.0f, 3.4f, 3.8f, 4.2f, 4.6f, // Row 5
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput<int8_t>(),
ElementsAreArray(ArrayFloatNear(
{
-0.50f, -0.46f, -0.42f, -0.38f, -0.34f, // Row 1
wuhy08 marked this conversation as resolved.
Show resolved Hide resolved
-0.30f, -0.26f, -0.22f, -0.18f, -0.14f, // Row 2
-0.10f, -0.06f, -0.02f, 0.20f, 0.60f, // Row 3
1.00f, 1.40f, 1.80f, 2.20f, 2.60f, // Row 4
3.00f, 3.40f, 3.80f, 4.20f, 4.60f, // Row 5
},
kQuantizedTolerance * 5)));
}

TEST(QuantizedActivationsOpTest, Relu1Int8) {
Expand Down
28 changes: 15 additions & 13 deletions tensorflow/lite/kernels/internal/reference/reference_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ limitations under the License.
#include <memory>
#include <type_traits>

#include "third_party/eigen3/Eigen/Core"
#include "fixedpoint/fixedpoint.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/experimental/ruy/profiler/instrumentation.h"
Expand Down Expand Up @@ -60,6 +59,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/strided_slice_logic.h"
#include "tensorflow/lite/kernels/internal/tensor.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "third_party/eigen3/Eigen/Core"

namespace tflite {

Expand Down Expand Up @@ -265,30 +265,32 @@ inline void LeakyRelu(const tflite::LeakyReluParams& params,
}

template <typename T>
inline void QuantizeLeakyRelu(const LeakyReluParams& params, T q_alpha,
inline void QuantizeLeakyRelu(const LeakyReluParams& params,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
ruy::profiler::ScopeLabel label("LeakyRelu (not fused)");
ruy::profiler::ScopeLabel label("Quantized LeakyRelu (not fused)");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static const int32 quantized_min = std::numeric_limits<T>::min();
static const int32 quantized_max = std::numeric_limits<T>::max();
static const int32 alpha_value = q_alpha - params.alpha_offset;
for (int i = 0; i < flat_size; ++i) {
const int32 input_value = input_data[i] - params.input_offset;
int32 unclamped_output;
if (input_value >= 0) {
output_data[i] = input_data[i];
unclamped_output = params.output_offset +
MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_identity,
params.output_shift_identity);
} else {
const int32 unclamped_output =
params.output_offset + MultiplyByQuantizedMultiplierSmallerThanOneExp(
input_value * alpha_value,
params.output_multiplier,
params.output_shift);
const T clamped_output =
std::min(quantized_max, std::max(quantized_min, unclamped_output));
output_data[i] = static_cast<uint8>(clamped_output);
unclamped_output = params.output_offset +
MultiplyByQuantizedMultiplier(
input_value, params.output_multiplier_alpha,
params.output_shift_alpha);
}
const T clamped_output =
std::min(quantized_max, std::max(quantized_min, unclamped_output));
output_data[i] = static_cast<T>(clamped_output);
}
}

Expand Down
7 changes: 4 additions & 3 deletions tensorflow/lite/kernels/internal/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -1082,10 +1082,11 @@ struct UnpackParams {
struct LeakyReluParams {
float alpha;
int32 input_offset;
int32 alpha_offset;
int32 output_offset;
int32 output_multiplier;
int output_shift;
int32 output_multiplier_alpha;
int32 output_shift_alpha;
int32 output_multiplier_identity;
int32 output_shift_identity;
};

template <typename P>
Expand Down
4 changes: 3 additions & 1 deletion tensorflow/lite/kernels/register.cc
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ BuiltinOpResolver::BuiltinOpResolver() {
AddBuiltin(BuiltinOperator_ZEROS_LIKE, Register_ZEROS_LIKE());
AddBuiltin(BuiltinOperator_FLOOR_MOD, Register_FLOOR_MOD());
AddBuiltin(BuiltinOperator_RANGE, Register_RANGE());
AddBuiltin(BuiltinOperator_LEAKY_RELU, Register_LEAKY_RELU());
AddBuiltin(BuiltinOperator_LEAKY_RELU, Register_LEAKY_RELU(),
/* min_version */ 1,
/* max_version */ 2);
AddBuiltin(BuiltinOperator_SQUARED_DIFFERENCE, Register_SQUARED_DIFFERENCE());
AddBuiltin(BuiltinOperator_FILL, Register_FILL());
AddBuiltin(BuiltinOperator_MIRROR_PAD, Register_MIRROR_PAD());
Expand Down