Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support quantized int8 and uint8 in TFLu mean operator #38634

Merged
merged 12 commits into from
Aug 30, 2020
2 changes: 1 addition & 1 deletion tensorflow/lite/kernels/internal/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ cc_library(
"reference/integer_ops/fully_connected.h",
"reference/integer_ops/l2normalization.h",
"reference/integer_ops/logistic.h",
"reference/integer_ops/mean.h",
"reference/integer_ops/mul.h",
"reference/integer_ops/pooling.h",
"reference/integer_ops/tanh.h",
Expand Down Expand Up @@ -487,7 +488,6 @@ cc_library(
"//conditions:default": [
"reference/integer_ops/dequantize.h",
"reference/integer_ops/log_softmax.h",
"reference/integer_ops/mean.h",
"reference/integer_ops/transpose_conv.h",
"reference/reference_ops.h",
"reference/sparse_ops/fully_connected.h",
Expand Down
6 changes: 3 additions & 3 deletions tensorflow/lite/kernels/internal/reference/integer_ops/mean.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ namespace reference_integer_ops {
template <typename integer_type>
inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,
int32_t shift, const RuntimeShape& unextended_input_shape,
const integer_type* input_data, int32 input_zero_point,
const integer_type* input_data, int32_t input_zero_point,
const RuntimeShape& unextended_output_shape,
integer_type* output_data, int32 output_zero_point) {
integer_type* output_data, int32_t output_zero_point) {
// Current implementation only supports dimension equals 4 and simultaneous
// reduction over width and height.
TFLITE_CHECK_EQ(unextended_input_shape.DimensionsCount(), 4);
Expand Down Expand Up @@ -53,7 +53,7 @@ inline void Mean(const tflite::MeanParams& op_params, int32_t multiplier,

for (int out_b = 0; out_b < output_batch; ++out_b) {
for (int out_d = 0; out_d < output_depth; ++out_d) {
int32 acc = 0;
int32_t acc = 0;
for (int in_h = 0; in_h < input_height; ++in_h) {
for (int in_w = 0; in_w < input_width; ++in_w) {
acc += input_data[Offset(input_shape, out_b, in_h, in_w, out_d)] -
Expand Down
8 changes: 4 additions & 4 deletions tensorflow/lite/kernels/internal/reference/reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,11 @@ inline bool Mean(const T* input_data, const int* input_dims,
}

// Calculate mean by dividing output_data by num of aggregated element.
U num_elements_in_axis = 1;
size_t num_elements_in_axis = 1;
for (int idx = 0; idx < num_resolved_axis; ++idx) {
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
// Overflow prevention.
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
return false;
}
num_elements_in_axis *= current;
Expand Down Expand Up @@ -359,11 +359,11 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32_t input_zero_point,
}

// Calculate mean by dividing output_data by num of aggregated element.
U num_elements_in_axis = 1;
size_t num_elements_in_axis = 1;
patriklaurell marked this conversation as resolved.
Show resolved Hide resolved
patriklaurell marked this conversation as resolved.
Show resolved Hide resolved
for (int idx = 0; idx < num_resolved_axis; ++idx) {
size_t current = static_cast<size_t>(input_dims[resolved_axis[idx]]);
// Overflow prevention.
if (current > (std::numeric_limits<U>::max() / num_elements_in_axis)) {
if (current > (std::numeric_limits<size_t>::max() / num_elements_in_axis)) {
return false;
}
num_elements_in_axis *= current;
Expand Down
134 changes: 119 additions & 15 deletions tensorflow/lite/micro/kernels/reduce.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ limitations under the License.
#include "tensorflow/lite/c/builtin_op_data.h"
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h"
patriklaurell marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tensorflow/lite/micro/kernels/reduce.cc:21:10: fatal error: '/tensorflow/lite/kernels/internal/reference/integer_ops/mean.h' file not found #include "tensorflow/lite/kernels/internal/reference/integer_ops/mean.h" ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 error generated.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What command is run to reproduce this error? @rthadur

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is still failing internally , can you please check

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Running bazel build //tensorflow/lite/micro/benchmarks:conv_benchmark locally I do not encounter the problem. Could you provide more detailed instructions for how to reproduce this error? @rthadur

#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/internal/types.h"
#include "tensorflow/lite/kernels/kernel_util.h"
Expand All @@ -32,6 +33,20 @@ namespace reduce {
constexpr int kMaxNumberOfAxis = 4;
constexpr int kMaxNumberOfReducedAxis = 2;

struct OpData {
int32_t multiplier;
int shift;
int temp_buffer_idx;
int input_zp;
float input_scale;
int output_zp;
float output_scale;
};

void* InitMean(TfLiteContext* context, const char* buffer, size_t length) {
return context->AllocatePersistentBuffer(context, sizeof(OpData));
}

TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
// Inputs Tensor (dtype depends on quantization):
// [0] = Input
Expand All @@ -51,6 +66,25 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
}

TfLiteStatus PrepareMeanOrSum(TfLiteContext* context, TfLiteNode* node) {
const TfLiteTensor* input = GetInput(context, node, 0);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
const TfLiteTensor* output = GetOutput(context, node, 0);
if (input->type == kTfLiteInt8) {
const double real_multiplier = static_cast<double>(input->params.scale) /
static_cast<double>(output->params.scale);
QuantizeMultiplier(real_multiplier, &op_data->multiplier, &op_data->shift);
}

int output_size = NumElements(output);
if (input->type == kTfLiteInt8 || input->type == kTfLiteUInt8) {
context->RequestScratchBufferInArena(context, output_size * sizeof(int32_t),
&op_data->temp_buffer_idx);
op_data->input_zp = input->params.zero_point;
op_data->input_scale = input->params.scale;
op_data->output_zp = output->params.zero_point;
op_data->output_scale = output->params.scale;
}

TF_LITE_ENSURE_OK(context, PrepareSimple(context, node));
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
return kTfLiteOk;
Expand All @@ -74,26 +108,25 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0);
TfLiteReducerParams* params =
reinterpret_cast<TfLiteReducerParams*>(node->builtin_data);
OpData* op_data = reinterpret_cast<OpData*>(node->user_data);

int num_axis = static_cast<int>(ElementCount(*axis->dims));
int temp_index[kMaxNumberOfAxis];
int resolved_axis[kMaxNumberOfReducedAxis];

tflite::MeanParams op_params;
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis, &op_params);
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
// scratch tensor allocation has been implemented in (b/132070898)
bool is_valid_inputs = (input->dims->size == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
TF_LITE_ENSURE_MSG(
context, is_valid_inputs == true,
"Number of Input "
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
switch (input->type) {
case kTfLiteFloat32: {
tflite::MeanParams op_params;
ResolveAxis(tflite::micro::GetTensorData<int>(axis), num_axis,
&op_params);
// TODO(b/146571391): Support only 4D Input and 2D Axis for Mean until
// scratch tensor allocation has been implemented in (b/132070898)
bool is_valid_inputs =
(input->dims->size == 4 && op_params.axis_count == 2 &&
((op_params.axis[0] == 1 && op_params.axis[1] == 2) ||
(op_params.axis[0] == 2 && op_params.axis[1] == 1)));
TF_LITE_ENSURE_MSG(
context, is_valid_inputs == true,
"Number of Input "
"dimensions != 4 OR the Axis is not either [1, 2] or [2, 1]");
// TODO(b/139102329): Handle the below special case in the combined
// reference method.
// Defer to specialized implementation for 4D Mean across axes 1 & 2.
Expand All @@ -114,18 +147,89 @@ TfLiteStatus EvalMean(TfLiteContext* context, TfLiteNode* node) {
tflite::micro::GetTensorData<float>(output)));
}
} break;
case kTfLiteInt8: {
if (params->keep_dims) {
reference_integer_ops::Mean(
op_params, op_data->multiplier, op_data->shift,
tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<int8_t>(output), op_data->output_zp);
} else if (op_data->input_zp == op_data->output_zp &&
op_data->input_scale == op_data->output_scale) {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::Mean(
tflite::micro::GetTensorData<int8_t>(input), input->dims->data,
input->dims->size, tflite::micro::GetTensorData<int8_t>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis), num_axis,
params->keep_dims, temp_index, resolved_axis, temp_buffer));
} else {
int32_t* temp_buffer = static_cast<int32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::QuantizedMeanOrSum(
tflite::micro::GetTensorData<int8_t>(input), op_data->input_zp,
op_data->input_scale, input->dims->data, input->dims->size,
tflite::micro::GetTensorData<int8_t>(output),
op_data->output_zp, op_data->output_scale, output->dims->data,
output->dims->size, tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index, resolved_axis,
temp_buffer, false));
}
} break;
case kTfLiteUInt8: {
if (params->keep_dims) {
reference_ops::Mean(op_params, tflite::micro::GetTensorShape(input),
tflite::micro::GetTensorData<uint8_t>(input),
op_data->input_zp, op_data->input_scale,
tflite::micro::GetTensorShape(output),
tflite::micro::GetTensorData<uint8_t>(output),
op_data->output_zp, op_data->output_scale);
} else if (op_data->input_zp == op_data->output_zp &&
op_data->input_scale == op_data->output_scale) {
uint32_t* temp_buffer = static_cast<uint32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::Mean(tflite::micro::GetTensorData<uint8_t>(input),
input->dims->data, input->dims->size,
tflite::micro::GetTensorData<uint8_t>(output),
output->dims->data, output->dims->size,
tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index,
resolved_axis, temp_buffer));
} else {
uint32_t* temp_buffer = static_cast<uint32_t*>(
context->GetScratchBuffer(context, op_data->temp_buffer_idx));
TF_LITE_ENSURE(
context,
reference_ops::QuantizedMeanOrSum(
tflite::micro::GetTensorData<uint8_t>(input), op_data->input_zp,
op_data->input_scale, input->dims->data, input->dims->size,
tflite::micro::GetTensorData<uint8_t>(output),
op_data->output_zp, op_data->output_scale, output->dims->data,
output->dims->size, tflite::micro::GetTensorData<int>(axis),
num_axis, params->keep_dims, temp_index, resolved_axis,
temp_buffer, false));
}
} break;
default:
// TODO(b/144955155): Support uint8_t(b/144955155) and int8_t(b/144955018)
TF_LITE_ENSURE_MSG(context, false,
"Currently, only float32 input type "
"Currently, only float32, int8 or uint8 input type "
"is supported.");
}
return kTfLiteOk;
}
} // namespace reduce

TfLiteRegistration Register_MEAN() {
return {/*init=*/nullptr,
return {/*init=*/reduce::InitMean,
/*free=*/nullptr,
/*prepare=*/reduce::PrepareMeanOrSum,
/*invoke=*/reduce::EvalMean,
Expand Down