Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cadence: HiFi4 Neural Network (NN) source download and Fix issue with… #37987

Merged
merged 1 commit into from
Mar 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
260 changes: 88 additions & 172 deletions tensorflow/lite/micro/kernels/xtensa_hifi/softmax.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
******************************************************************************/

/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Expand All @@ -40,52 +40,54 @@ limitations under the License.
#include "tensorflow/lite/c/common.h"
#include "tensorflow/lite/kernels/internal/common.h"
#include "tensorflow/lite/kernels/internal/quantization_util.h"
#include "tensorflow/lite/kernels/internal/reference/integer_ops/softmax.h"
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/op_macros.h"
#include "xtensa_tf_micro_common.h"

#include "xtensa_tf_micro_common.h"
namespace tflite {
namespace ops {
namespace micro {
namespace activations {
namespace {

struct OpData {
int32_t input_multiplier = 0;
int input_left_shift = 0;
int32_t input_range_radius = 0;
int diff_min = 0;
};

TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
TfLiteStatus CalculateSoftmaxParams(TfLiteContext* context,
const TfLiteTensor* input,
TfLiteTensor* output,
const TfLiteSoftmaxParams* params,
OpData* data) {
SoftmaxParams* op_data) {
if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
if (input->type == kTfLiteUInt8) {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteUInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteInt8);
if (output->type == kTfLiteInt16) {
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -32768);
// NOTE: Current int16 softmax output does not require symmetric scaling
// - so no need to verify scale here.
} else {
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteInt8);
TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
TF_LITE_ENSURE(context, output->params.scale == 1.f / 256);
}
}

static const int kScaledDiffIntegerBits = 5;

int input_left_shift;
tflite::PreprocessSoftmaxScaling(
static_cast<double>(params->beta),
static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
&data->input_multiplier, &data->input_left_shift);
data->diff_min = -1.0 * tflite::CalculateInputRadius(
kScaledDiffIntegerBits, data->input_left_shift);
&op_data->input_multiplier, &input_left_shift);
op_data->input_left_shift = input_left_shift;
op_data->diff_min =
-1.0 * tflite::CalculateInputRadius(kScaledDiffIntegerBits,
op_data->input_left_shift);
} else {
TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
TF_LITE_ENSURE_TYPES_EQ(context, output->type, kTfLiteFloat32);
op_data->beta = static_cast<double>(params->beta);
}
return kTfLiteOk;
}
Expand All @@ -99,219 +101,133 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
void Free(TfLiteContext* context, void* buffer) {}

TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteOk;
}
TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
const TfLiteTensor* input = GetInput(context, node, 0);
TF_LITE_ENSURE(context, NumDimensions(input) >= 1);

// Takes a 1D tensor and performs softmax along it.
void Softmax1DFloat(const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params) {
const int input_size = input->dims->data[0];
tflite::reference_ops::Softmax(input->data.f, input_size, 1, params->beta,
output->data.f);
return kTfLiteOk;
}

// Takes a 2D tensor and perform softmax along the last dimension.
TfLiteStatus Softmax2DFloat(TfLiteContext* context, const TfLiteTensor* input,
TfLiteTensor* output, TfLiteSoftmaxParams* params) {
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
// Takes a tensor and performs softmax along the last dimension.
TfLiteStatus SoftmaxFloat(TfLiteContext *context, const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
const RuntimeShape& input_shape = GetTensorShape(input);
const float *input_data = GetTensorData<float>(input);
const RuntimeShape& output_shape = GetTensorShape(output);
float *output_data = GetTensorData<float>(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);

ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
float* p_scratch = (float*)xtensa_nnlib_scratch_buf;
float *p_scratch = (float *)xtensa_nnlib_scratch_buf;

if (input->dims->data[1] * sizeof(float) > XTENSA_NNLIB_MAX_SCRATCH_SIZE) {
if(depth * sizeof(float) > XTENSA_NNLIB_MAX_SCRATCH_SIZE)
{
TF_LITE_KERNEL_LOG(context, "Softmax: insufficient scratch memory");
return kTfLiteError;
}

for (int i = 0; i < batch_size * input_size; ++i) {
p_scratch[i] = input->data.f[i] * params->beta;
}
for (int i = 0; i < outer_size; ++i) {
for (int c = 0; c < depth; ++c) {
p_scratch[c] = input_data[i * depth + c] * static_cast<float>(op_data.beta);
}

for (int i = 0; i < batch_size; ++i) {
int err = xa_nn_vec_softmax_f32_f32(&output->data.f[i * input_size],
&p_scratch[i * input_size], input_size);
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_f32_f32 failed");
int err = xa_nn_vec_softmax_f32_f32(&output_data[i * depth],
p_scratch,
depth);
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_f32_f32 failed"); \
}
return kTfLiteOk;
}

void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params, OpData* data) {
// (ahentz): this is arguably a dirty trick. Since the implementation
// always traverses the last dimension of a 4D tensor, we will pretend our 1D
// tensor is 4D in a special way. We will convert a (Y) shape into a (1,
// 1, 1, Y) shape.
const int input_size = input->dims->data[0];
const int32_t shape_data[4] = {1, 1, 1, input_size};
RuntimeShape shape(4, shape_data);
SoftmaxParams op_params;
op_params.input_multiplier = data->input_multiplier;
op_params.input_left_shift = data->input_left_shift;
op_params.diff_min = data->diff_min;
TfLiteStatus SoftmaxQuantized(TfLiteContext* context, const TfLiteTensor* input, TfLiteTensor* output,
const SoftmaxParams& op_data) {
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(op_params, shape,
GetTensorData<uint8_t>(input), shape,
GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_integer_ops::Softmax(
op_params, shape, GetTensorData<int8_t>(input), shape,
GetTensorData<int16_t>(output));
} else {
tflite::reference_integer_ops::Softmax(
op_params, shape, GetTensorData<int8_t>(input), shape,
GetTensorData<int8_t>(output));
}
}
}

TfLiteStatus Softmax2DQuantized(TfLiteContext* context,
const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params, OpData* data) {
// (ahentz): this is arguably a dirty trick. Since the implementation
// always traverses the last dimension of a 4D tensor, we will pretend our 2D
// tensor is 4D in a special way. We will convert a (X, Y) shape into a (X,
// 1, 1, Y) shape.
const int batch_size = input->dims->data[0];
const int input_size = input->dims->data[1];
const int32_t shape_data[4] = {batch_size, 1, 1, input_size};
RuntimeShape shape(4, shape_data);
SoftmaxParams op_params;
op_params.input_multiplier = data->input_multiplier;
op_params.input_left_shift = data->input_left_shift;
op_params.diff_min = data->diff_min;
const RuntimeShape& input_shape = GetTensorShape(input);
const uint8_t *input_data = GetTensorData<uint8_t>(input);
const RuntimeShape& output_shape = GetTensorShape(output);
uint8_t *output_data = GetTensorData<uint8_t>(output);
const int trailing_dim = input_shape.DimensionsCount() - 1;
const int outer_size =
MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
const int depth =
MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);

if (input->type == kTfLiteUInt8) {
ALLOCATE_XTENSA_NNLIB_SCRATCH_MEM;
void* p_scratch = (void*)xtensa_nnlib_scratch_buf;
void *p_scratch = (void *)xtensa_nnlib_scratch_buf;

if (get_softmax_scratch_size(PREC_ASYM8, PREC_ASYM8, input_size) >
XTENSA_NNLIB_MAX_SCRATCH_SIZE) {
if(get_softmax_scratch_size(PREC_ASYM8, PREC_ASYM8, depth) > XTENSA_NNLIB_MAX_SCRATCH_SIZE)
{
TF_LITE_KERNEL_LOG(context, "Softmax: insufficient scratch memory");
return kTfLiteError;
}

for (int i = 0; i < batch_size; ++i) {
int err = xa_nn_vec_softmax_asym8_asym8(
&output->data.uint8[i * input_size],
&input->data.uint8[i * input_size], op_params.diff_min,
op_params.input_left_shift, op_params.input_multiplier, input_size,
p_scratch);
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_asym8_asym8 failed");
for (int i = 0; i < outer_size; ++i) {
int err = xa_nn_vec_softmax_asym8_asym8(&output_data[i * depth],
&input_data[i * depth],
op_data.diff_min,
op_data.input_left_shift,
op_data.input_multiplier,
depth,
p_scratch
);
CHECK_ERR_HIFI_NNLIB_KER(err, "xa_nn_vec_softmax_asym8_asym8 failed"); \
}
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_integer_ops::Softmax(
op_params, shape, GetTensorData<int8_t>(input), shape,
GetTensorData<int16_t>(output));
} else {
tflite::reference_integer_ops::Softmax(
op_params, shape, GetTensorData<int8_t>(input), shape,
GetTensorData<int8_t>(output));
}
}
return kTfLiteOk;
}

// Takes a 4D tensor and perform softmax along the forth dimension.
void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params) {
SoftmaxParams op_params;
op_params.beta = static_cast<double>(params->beta);
tflite::reference_ops::Softmax(
op_params, GetTensorShape(input), GetTensorData<float>(input),
GetTensorShape(output), GetTensorData<float>(output));
}

void Softmax4DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
TfLiteSoftmaxParams* params, OpData* data) {
SoftmaxParams op_params;
op_params.input_multiplier = data->input_multiplier;
op_params.input_left_shift = data->input_left_shift;
op_params.diff_min = data->diff_min;
if (input->type == kTfLiteUInt8) {
tflite::reference_ops::Softmax(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(output), GetTensorData<uint8_t>(output));
} else {
if (output->type == kTfLiteInt16) {
tflite::reference_integer_ops::Softmax(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int16_t>(output));
} else {
tflite::reference_integer_ops::Softmax(
op_params, GetTensorShape(input), GetTensorData<int8_t>(input),
tflite::reference_ops::Softmax(
op_data, GetTensorShape(input), GetTensorData<int8_t>(input),
GetTensorShape(output), GetTensorData<int8_t>(output));
}
}
return kTfLiteOk;
}

TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
auto* params = reinterpret_cast<TfLiteSoftmaxParams*>(node->builtin_data);
auto* params = static_cast<TfLiteSoftmaxParams*>(node->builtin_data);

const TfLiteTensor* input = GetInput(context, node, 0);
TfLiteTensor* output = GetOutput(context, node, 0);

OpData local_data_object;
OpData* data = &local_data_object;
SoftmaxParams op_data;
TF_LITE_ENSURE_STATUS(
CalculateSoftmaxOpData(context, input, output, params, data));
CalculateSoftmaxParams(context, input, output, params, &op_data));

// (ahentz): consider an implementation that works for many (all?)
// dimensions.
switch (input->type) {
case kTfLiteFloat32: {
if (NumDimensions(input) == 1) {
Softmax1DFloat(input, output, params);
return kTfLiteOk;
}
if (NumDimensions(input) == 2) {
return Softmax2DFloat(context, input, output, params);
}
if (NumDimensions(input) == 4) {
Softmax4DFloat(input, output, params);
return kTfLiteOk;
}
TF_LITE_KERNEL_LOG(
context, "Only 1D, 2D and 4D tensors supported currently, got %dD.",
NumDimensions(input));
return kTfLiteError;
return SoftmaxFloat(context, input, output, op_data);
}
case kTfLiteInt8:
case kTfLiteUInt8: {
if (NumDimensions(input) == 1) {
Softmax1DQuantized(input, output, params, data);
return kTfLiteOk;
}
if (NumDimensions(input) == 2) {
return Softmax2DQuantized(context, input, output, params, data);
}
if (NumDimensions(input) == 4) {
Softmax4DQuantized(input, output, params, data);
return kTfLiteOk;
}
TF_LITE_KERNEL_LOG(context,
"Only 2D and 4D tensors supported currently, got %dD.",
NumDimensions(input));
return kTfLiteError;
return SoftmaxQuantized(context, input, output, op_data);
}
default:
TF_LITE_KERNEL_LOG(
context,
"Only float32, uint8_t and int8_t supported currently, got %d.",
"Only float32, uint8_t and int8_t input supported currently, got %d.",
input->type);
return kTfLiteError;
}
}
} // namespace activations

TfLiteRegistration* Register_SOFTMAX() {
static TfLiteRegistration r = {};
r.init = activations::Init;
r.free = activations::Free;
r.prepare = activations::SoftmaxPrepare;
r.invoke = activations::SoftmaxEval;
static TfLiteRegistration r = {activations::Init,
activations::Free,
activations::SoftmaxPrepare,
activations::SoftmaxEval,
nullptr,
0,
nullptr,
0};
return &r;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ifneq ($(filter xtensa_hifi, $(ALL_TAGS)),)

XTENSA_PATH = $(MAKEFILE_DIR)/../../kernels/xtensa_hifi
XTENSA_PATH = $(MAKEFILE_DIR)/downloads

ifneq (,$(filter hifi4%, $(TARGET_ARCH)))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
ifeq ($(TARGET), xtensa_hifi)
TARGET_ARCH := hifi3_bd5

$(eval $(call add_third_party_download,$(XTENSA_HIFI4_URL),$(XTENSA_HIFI4_MD5),xa_nnlib,))

PLATFORM_ARGS = \
-mno-mul16 \
-mno-mul32 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
ifeq ($(TARGET), xtensa-xpg)
TARGET_ARCH := xtensa-xpg

$(eval $(call add_third_party_download,$(XTENSA_HIFI4_URL),$(XTENSA_HIFI4_MD5),xa_nnlib,))

PLATFORM_ARGS = \
-DTF_LITE_MCU_DEBUG_LOG \
--xtensa-core=$(XTENSA_CORE) \
Expand Down