From ce29f31b04b2e9af732acf94e3e86ec4d83317dd Mon Sep 17 00:00:00 2001 From: Fredrik Knutsson Date: Thu, 20 Jun 2019 09:18:23 +0200 Subject: [PATCH] Call CMSIS-NN optimized kernel for depthwise_conv By usings TAGS=cmsis-nn, the optimized depthwise conv is called, under the restriction that the kernel meets size requirements. Change-Id: I0a070b37ce7dcd06dd8c747de362b4fd42ed4e5a --- .../micro/kernels/cmsis-nn/depthwise_conv.cc | 64 +++++++++++++++---- .../micro/tools/make/ext_libs/cmsis.inc | 20 +++--- 2 files changed, 60 insertions(+), 24 deletions(-) diff --git a/tensorflow/lite/experimental/micro/kernels/cmsis-nn/depthwise_conv.cc b/tensorflow/lite/experimental/micro/kernels/cmsis-nn/depthwise_conv.cc index 8e3f4b9fe3fbab..a77ebd9fb7118e 100644 --- a/tensorflow/lite/experimental/micro/kernels/cmsis-nn/depthwise_conv.cc +++ b/tensorflow/lite/experimental/micro/kernels/cmsis-nn/depthwise_conv.cc @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/lite/kernels/internal/tensor_ctypes.h" #include "tensorflow/lite/kernels/kernel_util.h" #include "tensorflow/lite/kernels/padding.h" +#include "arm_nnfunctions.h" namespace tflite { namespace ops { @@ -145,22 +146,58 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node, // Legacy ops used mixed left and right shifts. Now all are +ve-means-left. op_params.output_shift = -data->output_shift; - tflite::reference_ops::DepthwiseConv( - op_params, GetTensorShape(input), GetTensorData(input), - GetTensorShape(filter), GetTensorData(filter), - GetTensorShape(bias), GetTensorData(bias), - GetTensorShape(output), GetTensorData(output)); + +#if defined(ARM_MATH_DSP) + // optimizations utilize loop unrolling which requires the following power + // of two kernel dimensions + RuntimeShape filter_shape = GetTensorShape(filter); + const int filter_height = filter_shape.Dims(1); + const int filter_width = filter_shape.Dims(2); + if (0 == op_params.depth_multiplier % 2 && 0 == filter_width % 2) { + RuntimeShape input_shape = GetTensorShape(input); + const int input_height = input_shape.Dims(1); + const int input_width = input_shape.Dims(2); + const int input_depth = input_shape.Dims(3); + RuntimeShape output_shape = GetTensorShape(output); + const int output_height = output_shape.Dims(1); + const int output_width = output_shape.Dims(2); + arm_depthwise_conv_u8_basic_ver1(GetTensorData(input), + input_width, + input_height, + input_depth, + GetTensorData(filter), + filter_width, + filter_height, + op_params.depth_multiplier, + op_params.padding_values.width, + op_params.padding_values.height, + op_params.stride_width, + op_params.stride_height, + op_params.dilation_width_factor, + op_params.dilation_height_factor, + GetTensorData(bias), + op_params.input_offset, + op_params.weights_offset, + op_params.output_offset, + GetTensorData(output), + output_width, + output_height, + op_params.quantized_activation_min, + op_params.quantized_activation_max, + op_params.output_shift, + op_params.output_multiplier); + } else +#endif + { + tflite::reference_ops::DepthwiseConv( + op_params, GetTensorShape(input), GetTensorData(input), + GetTensorShape(filter), GetTensorData(filter), + GetTensorShape(bias), GetTensorData(bias), + GetTensorShape(output), GetTensorData(output)); + } } TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { -#ifdef ARM_CMSIS_NN_M3 - return kTfLiteError; -#elif ARM_CMSIS_NN_M4 - // Todo: call cmsis ops - return kTfLiteError; -#elif ARM_CMSIS_NN_M7 - return kTfLiteError; -#else auto* params = reinterpret_cast(node->builtin_data); @@ -200,7 +237,6 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { return kTfLiteError; } return kTfLiteOk; -#endif } } // namespace depthwise_conv diff --git a/tensorflow/lite/experimental/micro/tools/make/ext_libs/cmsis.inc b/tensorflow/lite/experimental/micro/tools/make/ext_libs/cmsis.inc index 779b77f8be6233..c15828db5f040e 100644 --- a/tensorflow/lite/experimental/micro/tools/make/ext_libs/cmsis.inc +++ b/tensorflow/lite/experimental/micro/tools/make/ext_libs/cmsis.inc @@ -1,26 +1,26 @@ ifneq ($(filter cmsis-nn,$(ALL_TAGS)),) # Enable u-arch specfic behaviours ifneq (,$(filter $(TARGET_ARCH), cortex-m3)) - CCFLAGS += -DARM_MATH_CM3 - CXXFLAGS += -DARM_CMSIS_NN_M3 + # CMSIS-NN optimizations not supported endif ifneq (,$(filter $(TARGET_ARCH), cortex-m4)) - CCFLAGS += -DARM_MATH_CM4 - CXXFLAGS += -DARM_CMSIS_NN_M4 + CCFLAGS += -DARM_MATH_DSP + CXXFLAGS += -DARM_MATH_DSP endif ifneq (,$(filter $(TARGET_ARCH), cortex-m7)) - CCFLAGS += -DARM_MATH_CM7 - CXXFLAGS += -DARM_CMSIS_NN_M7 + CCFLAGS += -DARM_MATH_DSP + CXXFLAGS += -DARM_MATH_DSP endif ifneq (,$(filter $(TARGET_ARCH), x86_64)) - # For development purposes - CCFLAGS += -DARM_MATH_CM4 - CXXFLAGS += -DARM_CMSIS_NN_X86_64 + # CMSIS-NN optimizations not supported endif # Setup CMSIS-NN lib and add required header files to microlite lib INCLUDE CMSIS_PATH = $(MAKEFILE_DIR)/downloads/cmsis/ - MICROLITE_CC_SRCS += $(shell find $(CMSIS_PATH)/CMSIS/NN/Source/ -name *.c) + THIRD_PARTY_CC_SRCS += $(shell find $(CMSIS_PATH)/CMSIS/NN/Source/ -name *.c) + THIRD_PARTY_CC_HDRS += $(shell find $(CMSIS_PATH)/CMSIS/Core/Include/ -name *.h) \ + $(shell find $(CMSIS_PATH)/CMSIS/NN/Include/ -name *.h) \ + $(shell find $(CMSIS_PATH)/CMSIS/DSP/Include/ -name *.h) INCLUDES += -I$(CMSIS_PATH)/CMSIS/Core/Include \ -I$(CMSIS_PATH)/CMSIS/NN/Include \ -I$(CMSIS_PATH)/CMSIS/DSP/Include