Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for 8-bit Quantization for LeakyRelu #27061

Merged
merged 1 commit into from Apr 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
78 changes: 72 additions & 6 deletions tensorflow/lite/kernels/activations.cc
Expand Up @@ -54,6 +54,12 @@ struct LogSoftmaxOpData : public OpData {
int32_t reverse_scaling_right_shift = 0;
};

struct LeakyReluOpData : public OpData {
uint8_t q_alpha;
int32_t output_multiplier = 0;
int output_shift = 0;
};

struct PreluOpData : public OpData {
int32_t output_multiplier = 0;
int output_shift = 0;
Expand Down Expand Up @@ -112,6 +118,42 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
TfLiteIntArrayCopy(input->dims));
}

void* LeakyReluInit(TfLiteContext* context, const char* buffer, size_t length) {
return new LeakyReluOpData;
}

void LeakyReluFree(TfLiteContext* context, void* buffer) {
delete reinterpret_cast<LeakyReluOpData*>(buffer);
}

TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
TF_LITE_ENSURE_EQ(context, input->type, output->type);

LeakyReluOpData* data = reinterpret_cast<LeakyReluOpData*>(node->user_data);

if (output->type == kTfLiteUInt8) {
const auto* params =
reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
// Quantize the alpha with same zero-point and scale as of input
data->q_alpha = static_cast<uint8_t>(std::max<float>(
std::numeric_limits<uint8_t>::min(),
std::min<float>(std::numeric_limits<uint8_t>::max(),
std::round(input->params.zero_point +
(params->alpha / input->params.scale)))));

double real_multiplier =
input->params.scale * input->params.scale / output->params.scale;
amitsrivastava78 marked this conversation as resolved.
Show resolved Hide resolved
QuantizeMultiplierSmallerThanOneExp(
real_multiplier, &data->output_multiplier, &data->output_shift);
}
return context->ResizeTensor(context, output,
TfLiteIntArrayCopy(input->dims));
}

TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
OpData* data = reinterpret_cast<OpData*>(node->user_data);

Expand Down Expand Up @@ -925,11 +967,31 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
}
}

namespace {
template <typename T>
void QLeakyRelu(const TfLiteTensor* input, TfLiteTensor* output, float alpha,
const LeakyReluOpData* data) {
LeakyReluParams op_params;
op_params.input_offset = input->params.zero_point;
op_params.alpha_offset = input->params.zero_point;
op_params.output_offset = output->params.zero_point;

op_params.output_multiplier = data->output_multiplier;
op_params.output_shift = data->output_shift;

reference_ops::QuantizeLeakyRelu(
op_params, data->q_alpha, GetTensorShape(input), GetTensorData<T>(input),
GetTensorShape(output), GetTensorData<T>(output));
}
} // namespace

TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);
const auto* params =
reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
const LeakyReluOpData* data =
reinterpret_cast<LeakyReluOpData*>(node->user_data);

LeakyReluParams op_params;
op_params.alpha = params->alpha;
Expand All @@ -940,10 +1002,14 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
GetTensorShape(output), GetTensorData<float>(output));
return kTfLiteOk;
} break;
case kTfLiteUInt8: {
QLeakyRelu<uint8_t>(input, output, params->alpha, data);
return kTfLiteOk;
} break;
default:
context->ReportError(context,
"Only float32 is supported currently, got %s.",
TfLiteTypeGetName(input->type));
context->ReportError(
context, "Only float32 and uint8 is supported currently, got %s.",
TfLiteTypeGetName(input->type));
return kTfLiteError;
}
}
Expand Down Expand Up @@ -1054,9 +1120,9 @@ TfLiteRegistration* Register_PRELU() {
}

TfLiteRegistration* Register_LEAKY_RELU() {
static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr,
activations::GenericPrepare,
activations::LeakyReluEval};
static TfLiteRegistration r = {
activations::LeakyReluInit, activations::LeakyReluFree,
activations::LeakyReluPrepare, activations::LeakyReluEval};
return &r;
}

Expand Down
44 changes: 43 additions & 1 deletion tensorflow/lite/kernels/activations_test.cc
Expand Up @@ -57,6 +57,21 @@ class BaseActivationsOpModel : public SingleOpModel {
BuildInterpreter({GetShape(input_)});
}

// A dedicated constructor for LeakyRelu, which does some options.
BaseActivationsOpModel(TensorData input, float alpha) {
input_ = AddInput(input);
if (input.type == TensorType_UINT8) {
output_ = AddOutput({input.type, {}, input.min, input.max});
} else if (input.type == TensorType_INT8) {
output_ = AddOutput({TensorType_INT8, {}, input.min, input.max});
} else {
output_ = AddOutput({input.type, {}});
}
SetBuiltinOp(BuiltinOperator_LEAKY_RELU, BuiltinOptions_LeakyReluOptions,
CreateLeakyReluOptions(builder_, alpha).Union());
BuildInterpreter({GetShape(input_)});
}

BaseActivationsOpModel(BuiltinOperator type, const TensorData& input,
const TensorData& output) {
input_ = AddInput(input);
Expand Down Expand Up @@ -112,6 +127,7 @@ class QuantizedActivationsOpModel : public BaseActivationsOpModel {
std::vector<T> GetOutput() {
return ExtractVector<T>(output_);
}

template <typename T>
std::vector<float> GetDequantizedOutput() {
return Dequantize<T>(ExtractVector<T>(output_), GetScale(output_),
Expand Down Expand Up @@ -212,6 +228,33 @@ TEST(QuantizedActivationsOpTest, Relu6Uint8) {
ElementsAreArray({128, 128, 160, 192, 176, 128, 224, 144}));
}

TEST(QuantizedActivationsOpTest, LeakyReluUint8) {
const float kMin = -1;
const float kMax = 127.f / 128.f;
QuantizedActivationsOpModel m(
/*input=*/{TensorType_UINT8, {2, 3}, 8 * kMin, 8 * kMax}, 0.5);

m.SetInput<uint8_t>({
0.0f, 1.0f, 3.0f, // Row 1
1.0f, -1.0f, -2.0f, // Row 2
});
m.Invoke();
EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
ElementsAreArray(ArrayFloatNear(
{
0.0f, 1.0f, 3.0f, // Row 1
1.0f, -0.5f, -1.0f, // Row 2
},
kQuantizedTolerance)));
EXPECT_THAT(m.GetOutput<uint8_t>(), ElementsAreArray({
128,
144,
176,
144,
120,
112,
}));
}
TEST(QuantizedActivationsOpTest, Relu6Int8) {
const float kMin = -1;
const float kMax = 127.f / 128.f;
Expand Down Expand Up @@ -983,7 +1026,6 @@ TEST(FloatActivationsOpTest, LeakyRelu) {
1.0f, -0.5f, -1.0f, // Row 2
}));
}

} // namespace
} // namespace tflite

Expand Down
28 changes: 28 additions & 0 deletions tensorflow/lite/kernels/internal/reference/reference_ops.h
Expand Up @@ -318,6 +318,34 @@ inline void LeakyRelu(const tflite::LeakyReluParams& params,
}
}

template <typename T>
inline void QuantizeLeakyRelu(const LeakyReluParams& params, T q_alpha,
const RuntimeShape& input_shape,
const T* input_data,
const RuntimeShape& output_shape,
T* output_data) {
gemmlowp::ScopedProfilingLabel label("LeakyRelu (not fused)");
const int flat_size = MatchingFlatSize(input_shape, output_shape);
static const int32 quantized_min = std::numeric_limits<T>::min();
static const int32 quantized_max = std::numeric_limits<T>::max();
static const int32 alpha_value = q_alpha - params.alpha_offset;
for (int i = 0; i < flat_size; ++i) {
const int32 input_value = input_data[i] - params.input_offset;
if (input_value >= 0) {
output_data[i] = input_data[i];
} else {
const int32 unclamped_output =
params.output_offset + MultiplyByQuantizedMultiplierSmallerThanOneExp(
input_value * alpha_value,
params.output_multiplier,
params.output_shift);
const T clamped_output =
std::min(quantized_max, std::max(quantized_min, unclamped_output));
output_data[i] = static_cast<uint8>(clamped_output);
}
}
}

inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
const RuntimeShape& input_shape,
const float* input_data,
Expand Down
5 changes: 5 additions & 0 deletions tensorflow/lite/kernels/internal/types.h
Expand Up @@ -1022,6 +1022,11 @@ struct UnpackParams {

struct LeakyReluParams {
float alpha;
int32 input_offset;
int32 alpha_offset;
int32 output_offset;
int32 output_multiplier;
int output_shift;
};

template <typename P>
Expand Down
5 changes: 3 additions & 2 deletions tensorflow/lite/toco/graph_transformations/quantize.cc
Expand Up @@ -62,8 +62,9 @@ bool SupportsQuantization(const Operator& op) {
type == OperatorType::kLessEqual || type == OperatorType::kSelect ||
type == OperatorType::kArgMax || type == OperatorType::kRelu ||
type == OperatorType::kRelu1 || type == OperatorType::kRelu6 ||
type == OperatorType::kShape || type == OperatorType::kExpandDims ||
type == OperatorType::kPack || type == OperatorType::kTopK_V2 ||
type == OperatorType::kLeakyRelu || type == OperatorType::kShape ||
type == OperatorType::kExpandDims || type == OperatorType::kPack ||
type == OperatorType::kTopK_V2 ||
type == OperatorType::kRandomUniform ||
type == OperatorType::kResizeNearestNeighbor ||
type == OperatorType::kPRelu || type == OperatorType::kReduceMax ||
Expand Down