Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Call CMSIS-NN optimized kernel for depthwise_conv #30008

Merged
merged 1 commit into from Jun 28, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -22,6 +22,7 @@ limitations under the License.
#include "tensorflow/lite/kernels/internal/tensor_ctypes.h"
#include "tensorflow/lite/kernels/kernel_util.h"
#include "tensorflow/lite/kernels/padding.h"
#include "arm_nnfunctions.h"

namespace tflite {
namespace ops {
Expand Down Expand Up @@ -145,22 +146,58 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
// Legacy ops used mixed left and right shifts. Now all are +ve-means-left.
op_params.output_shift = -data->output_shift;

tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<uint8_t>(output));

#if defined(ARM_MATH_DSP)
// optimizations utilize loop unrolling which requires the following power
// of two kernel dimensions
RuntimeShape filter_shape = GetTensorShape(filter);
const int filter_height = filter_shape.Dims(1);
const int filter_width = filter_shape.Dims(2);
if (0 == op_params.depth_multiplier % 2 && 0 == filter_width % 2) {
RuntimeShape input_shape = GetTensorShape(input);
const int input_height = input_shape.Dims(1);
const int input_width = input_shape.Dims(2);
const int input_depth = input_shape.Dims(3);
RuntimeShape output_shape = GetTensorShape(output);
const int output_height = output_shape.Dims(1);
const int output_width = output_shape.Dims(2);
arm_depthwise_conv_u8_basic_ver1(GetTensorData<uint8_t>(input),
input_width,
input_height,
input_depth,
GetTensorData<uint8_t>(filter),
filter_width,
filter_height,
op_params.depth_multiplier,
op_params.padding_values.width,
op_params.padding_values.height,
op_params.stride_width,
op_params.stride_height,
op_params.dilation_width_factor,
op_params.dilation_height_factor,
GetTensorData<int32_t>(bias),
op_params.input_offset,
op_params.weights_offset,
op_params.output_offset,
GetTensorData<uint8_t>(output),
output_width,
output_height,
op_params.quantized_activation_min,
op_params.quantized_activation_max,
op_params.output_shift,
op_params.output_multiplier);
} else
#endif
{
tflite::reference_ops::DepthwiseConv(
op_params, GetTensorShape(input), GetTensorData<uint8_t>(input),
GetTensorShape(filter), GetTensorData<uint8_t>(filter),
GetTensorShape(bias), GetTensorData<int32_t>(bias),
GetTensorShape(output), GetTensorData<uint8_t>(output));
}
}

TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
#ifdef ARM_CMSIS_NN_M3
return kTfLiteError;
#elif ARM_CMSIS_NN_M4
// Todo: call cmsis ops
return kTfLiteError;
#elif ARM_CMSIS_NN_M7
return kTfLiteError;
#else
auto* params =
reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);

Expand Down Expand Up @@ -200,7 +237,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
return kTfLiteError;
}
return kTfLiteOk;
#endif
}

} // namespace depthwise_conv
Expand Down
20 changes: 10 additions & 10 deletions tensorflow/lite/experimental/micro/tools/make/ext_libs/cmsis.inc
@@ -1,26 +1,26 @@
ifneq ($(filter cmsis-nn,$(ALL_TAGS)),)
# Enable u-arch specfic behaviours
ifneq (,$(filter $(TARGET_ARCH), cortex-m3))
CCFLAGS += -DARM_MATH_CM3
CXXFLAGS += -DARM_CMSIS_NN_M3
# CMSIS-NN optimizations not supported
endif
ifneq (,$(filter $(TARGET_ARCH), cortex-m4))
CCFLAGS += -DARM_MATH_CM4
CXXFLAGS += -DARM_CMSIS_NN_M4
CCFLAGS += -DARM_MATH_DSP
CXXFLAGS += -DARM_MATH_DSP
endif
ifneq (,$(filter $(TARGET_ARCH), cortex-m7))
CCFLAGS += -DARM_MATH_CM7
CXXFLAGS += -DARM_CMSIS_NN_M7
CCFLAGS += -DARM_MATH_DSP
CXXFLAGS += -DARM_MATH_DSP
endif
ifneq (,$(filter $(TARGET_ARCH), x86_64))
# For development purposes
CCFLAGS += -DARM_MATH_CM4
CXXFLAGS += -DARM_CMSIS_NN_X86_64
# CMSIS-NN optimizations not supported
endif

# Setup CMSIS-NN lib and add required header files to microlite lib INCLUDE
CMSIS_PATH = $(MAKEFILE_DIR)/downloads/cmsis/
MICROLITE_CC_SRCS += $(shell find $(CMSIS_PATH)/CMSIS/NN/Source/ -name *.c)
THIRD_PARTY_CC_SRCS += $(shell find $(CMSIS_PATH)/CMSIS/NN/Source/ -name *.c)
THIRD_PARTY_CC_HDRS += $(shell find $(CMSIS_PATH)/CMSIS/Core/Include/ -name *.h) \
$(shell find $(CMSIS_PATH)/CMSIS/NN/Include/ -name *.h) \
$(shell find $(CMSIS_PATH)/CMSIS/DSP/Include/ -name *.h)
INCLUDES += -I$(CMSIS_PATH)/CMSIS/Core/Include \
-I$(CMSIS_PATH)/CMSIS/NN/Include \
-I$(CMSIS_PATH)/CMSIS/DSP/Include
Expand Down