-
Notifications
You must be signed in to change notification settings - Fork 74k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[TFLM] Added optimized softmax kernel (int32 and float) for CEVA-BX1 and CEVA SP500 #47783
Merged
copybara-service
merged 5 commits into
tensorflow:master
from
yair-ehrenwald:add_softmax_optimized
Mar 19, 2021
Merged
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
775b186
added optimized softmax
yair-ehrenwald 27d0a2c
fixed include path
yair-ehrenwald 374998f
Merge remote-tracking branch 'upstream/master' into add_softmax_optim…
yair-ehrenwald 289ab7f
Removed uint8 and moved scratch to header file
yair-ehrenwald 00a0363
Added license
yair-ehrenwald File filter
Filter by extension
Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#ifndef TENSORFLOW_LITE_MICRO_KERNELS_CEVA_CEVA_COMMON_H_ | ||
#define TENSORFLOW_LITE_MICRO_KERNELS_CEVA_CEVA_COMMON_H_ | ||
|
||
#if defined(CEVA_BX1) || defined(CEVA_SP500) | ||
extern int32_t* CEVA_TFLM_KERNELS_SCRATCH; | ||
extern int32_t CEVA_TFLM_KERNELS_SCRATCH_SIZE_VAL; | ||
#endif | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,172 @@ | ||
/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. | ||
|
||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
==============================================================================*/ | ||
|
||
#include "tensorflow/lite/kernels/internal/reference/softmax.h" | ||
|
||
#include "tensorflow/lite/c/builtin_op_data.h" | ||
#include "tensorflow/lite/c/common.h" | ||
#include "tensorflow/lite/kernels/internal/common.h" | ||
#include "tensorflow/lite/kernels/internal/quantization_util.h" | ||
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h" | ||
#include "tensorflow/lite/kernels/kernel_util.h" | ||
#include "tensorflow/lite/kernels/op_macros.h" | ||
#include "tensorflow/lite/micro/kernels/ceva/ceva_common.h" | ||
#include "tensorflow/lite/micro/kernels/ceva/ceva_tflm_lib.h" | ||
#include "tensorflow/lite/micro/kernels/kernel_util.h" | ||
#include "tensorflow/lite/micro/kernels/softmax.h" | ||
#ifdef MCPS_MEASUREMENT | ||
#include "tensorflow/lite/micro/kernels/ceva/mcps_macros.h" | ||
#endif | ||
|
||
namespace tflite { | ||
namespace { | ||
|
||
// Takes a tensor and performs softmax along the last dimension. | ||
void SoftmaxFloatCEVA(const TfLiteEvalTensor* input, TfLiteEvalTensor* output, | ||
const SoftmaxParams& op_data) { | ||
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); | ||
const float* input_data = tflite::micro::GetTensorData<float>(input); | ||
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); | ||
float* output_data = tflite::micro::GetTensorData<float>(output); | ||
|
||
const float beta = static_cast<float>(op_data.beta); | ||
const int trailing_dim = input_shape.DimensionsCount() - 1; | ||
const int outer_size = | ||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); | ||
const int depth = | ||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); | ||
int outer_size_mcps = outer_size; | ||
int depth_mcps = depth; | ||
|
||
#ifdef MCPS_MEASUREMENT | ||
MCPS_START_ONE; | ||
#endif | ||
for (int i = 0; i < outer_size; ++i) { | ||
CEVA_TFLM_Softmax_Float32(&input_data[i * depth], &output_data[i * depth], | ||
beta, depth); | ||
} | ||
#ifdef MCPS_MEASUREMENT | ||
MCPS_STOP_ONE( | ||
"Test params:Call CEVA_TFLM_Softmax_Float32 %d times, inetrnal loop = %d", | ||
outer_size_mcps, depth_mcps); | ||
#endif | ||
} | ||
|
||
TfLiteStatus SoftmaxQuantizedCEVA(TfLiteContext* context, | ||
const TfLiteEvalTensor* input, | ||
TfLiteEvalTensor* output, | ||
const SoftmaxParams& op_data) { | ||
if (input->type == kTfLiteInt8) { | ||
if (output->type == kTfLiteInt16) { | ||
tflite::reference_ops::Softmax( | ||
op_data, tflite::micro::GetTensorShape(input), | ||
tflite::micro::GetTensorData<int8_t>(input), | ||
tflite::micro::GetTensorShape(output), | ||
tflite::micro::GetTensorData<int16_t>(output)); | ||
} else { | ||
const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input); | ||
const int8_t* input_data = tflite::micro::GetTensorData<int8_t>(input); | ||
|
||
const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output); | ||
int8_t* output_data = tflite::micro::GetTensorData<int8_t>(output); | ||
|
||
const int32_t input_beta_multiplier = | ||
static_cast<int32_t>(op_data.input_multiplier); | ||
const int32_t input_beta_left_shift = | ||
static_cast<int32_t>(op_data.input_left_shift); | ||
const int trailing_dim = input_shape.DimensionsCount() - 1; | ||
const int outer_size = | ||
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape); | ||
const int depth = | ||
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim); | ||
int outer_size_mcps = outer_size; | ||
int depth_mcps = depth; | ||
|
||
if (depth > CEVA_TFLM_KERNELS_SCRATCH_SIZE_VAL) { | ||
TF_LITE_KERNEL_LOG(context, "Scratch size (%d) less that required (%d)", | ||
CEVA_TFLM_KERNELS_SCRATCH_SIZE_VAL, depth); | ||
return kTfLiteError; | ||
} | ||
|
||
#ifdef MCPS_MEASUREMENT | ||
MCPS_START_ONE; | ||
#endif | ||
for (int i = 0; i < outer_size; ++i) { | ||
CEVA_TFLM_Softmax_Int8(&input_data[i * depth], &output_data[i * depth], | ||
input_beta_multiplier, input_beta_left_shift, | ||
depth, CEVA_TFLM_KERNELS_SCRATCH); | ||
} | ||
#ifdef MCPS_MEASUREMENT | ||
MCPS_STOP_ONE( | ||
"Test params:Call CEVA_TFLM_Softmax_Int8 %d times, inetrnal loop = " | ||
"%d", | ||
outer_size_mcps, depth_mcps); | ||
#endif | ||
} | ||
} else { | ||
tflite::reference_ops::SoftmaxInt16( | ||
op_data, tflite::micro::GetTensorShape(input), | ||
tflite::micro::GetTensorData<int16_t>(input), | ||
tflite::micro::GetTensorShape(output), | ||
tflite::micro::GetTensorData<int16_t>(output)); | ||
} | ||
|
||
return kTfLiteOk; | ||
} | ||
|
||
TfLiteStatus SoftmaxEvalCEVA(TfLiteContext* context, TfLiteNode* node) { | ||
const TfLiteEvalTensor* input = tflite::micro::GetEvalInput(context, node, 0); | ||
TfLiteEvalTensor* output = tflite::micro::GetEvalOutput(context, node, 0); | ||
|
||
TFLITE_DCHECK(node->user_data != nullptr); | ||
SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data); | ||
|
||
switch (input->type) { | ||
case kTfLiteFloat32: { | ||
SoftmaxFloatCEVA(input, output, op_data); | ||
return kTfLiteOk; | ||
} | ||
case kTfLiteInt8: | ||
case kTfLiteInt16: { | ||
return SoftmaxQuantizedCEVA(context, input, output, op_data); | ||
} | ||
default: | ||
TF_LITE_KERNEL_LOG(context, "Type %s (%d) not supported.", | ||
TfLiteTypeGetName(input->type), input->type); | ||
return kTfLiteError; | ||
} | ||
} | ||
|
||
TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { | ||
#if defined(CEVA_BX1) || defined(CEVA_SP500) | ||
return SoftmaxEvalCEVA(context, node); | ||
#else | ||
return SoftmaxEval(context, node); // reference fallback | ||
#endif | ||
} | ||
} // namespace | ||
|
||
TfLiteRegistration Register_SOFTMAX() { | ||
return {/*init=*/SoftmaxInit, | ||
/*free=*/nullptr, | ||
/*prepare=*/SoftmaxPrepare, | ||
/*invoke=*/Eval, | ||
/*profiling_string=*/nullptr, | ||
/*builtin_code=*/0, | ||
/*custom_name=*/nullptr, | ||
/*version=*/0}; | ||
} | ||
|
||
} // namespace tflite |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you could add these to ceva_common.h as well?
No change needed in this PR. If you think it is worthwhile then you can make the change in all the kernels in a follow-up PR.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd like to have our measurement macros in all kernels, but only some of them require scratch memory, so I'll leave it as is for the time being. Eventually I think we will do some work on the whole scratch usage issue, exact size calculations per kernel etc and hopefully come up with something maintainable.