From aff6f7b83d0ffd2c081bab04915ce4fd01868838 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Mon, 3 May 2021 08:41:57 +0200 Subject: [PATCH 1/3] TFLM: Enable FVP target in benchmarks * Enable FVP target with and without Ethos-U for person detect benchmarks. * Use uint32 instead of int32 in TicksToMs to avoid overflow. * Add GetCurrentTicks for FVP target. * Use Vela converted model in person detect benchmarks for Ethos-U. Change-Id: Iae7f60fea22c3a652dc39542966b89fd5d541f1e --- tensorflow/lite/micro/benchmarks/Makefile.inc | 10 +++- tensorflow/lite/micro/benchmarks/README.md | 25 ++++++++++ .../benchmarks/person_detection_benchmark.cc | 4 +- .../micro/cortex_m_corstone_300/micro_time.cc | 21 ++++++++ .../cortex_m_corstone_300/system_setup.cc | 50 +++++++++++++++++++ .../lite/micro/kernels/ethos_u/README.md | 2 + tensorflow/lite/micro/micro_time.h | 6 +-- .../testing/test_with_arm_corstone_300.sh | 15 +++--- .../make/ethos_u_core_platform_download.sh | 3 ++ .../micro/tools/make/ext_libs/ethos_u.inc | 7 ++- .../cortex_m_corstone_300_makefile.inc | 3 -- 11 files changed, 130 insertions(+), 16 deletions(-) create mode 100644 tensorflow/lite/micro/cortex_m_corstone_300/micro_time.cc diff --git a/tensorflow/lite/micro/benchmarks/Makefile.inc b/tensorflow/lite/micro/benchmarks/Makefile.inc index 2106ae3bfedaeb..cf16affa58ecb9 100644 --- a/tensorflow/lite/micro/benchmarks/Makefile.inc +++ b/tensorflow/lite/micro/benchmarks/Makefile.inc @@ -9,9 +9,17 @@ tensorflow/lite/micro/benchmarks/micro_benchmark.h PERSON_DETECTION_BENCHMARK_SRCS := \ tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc \ $(MAKEFILE_DIR)/downloads/person_model_int8/no_person_image_data.cc \ -$(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data.cc \ $(MAKEFILE_DIR)/downloads/person_model_int8/person_image_data.cc +ifeq ($(CO_PROCESSOR),ethos_u) + # Ethos-U use a Vela optimized version of the original model. + PERSON_DETECTION_BENCHMARK_SRCS += \ + $(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data_vela.cc +else + PERSON_DETECTION_BENCHMARK_SRCS += \ + $(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data.cc +endif + PERSON_DETECTION_BENCHMARK_HDRS := \ tensorflow/lite/micro/examples/person_detection/person_detect_model_data.h \ tensorflow/lite/micro/examples/person_detection/no_person_image_data.h \ diff --git a/tensorflow/lite/micro/benchmarks/README.md b/tensorflow/lite/micro/benchmarks/README.md index 74de7599c97e6f..cdd7b6685ac784 100644 --- a/tensorflow/lite/micro/benchmarks/README.md +++ b/tensorflow/lite/micro/benchmarks/README.md @@ -64,3 +64,28 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=sparkfun_edge person_de Refer to flashing instructions in the [Person Detection Example](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/person_detection/README.md#running-on-sparkfun-edge). + +## Run on FVP based on Arm Corstone-300 software. +For more info about the Corstone-300 software see: tensorflow/lite/micro/cortex_m_corstone_300/README.md. +Disclaimer: Executing the benchmark test on the Corstone-300 software will provide a general metric of instructions executed. The estimates are not cycle accurate, however it aligns to instruction per cycle, and is a consistent environment. This means it can detect if code changes changed performance. + +The person detection benchmark can also run with Ethos-U enabled, as the downloaded model will be optimized for Ethos-U. +For more info see: tensorflow/lite/micro/kernels/ethos_u/README.md. + +To run the keyword benchmark on FVP, run + +``` +make -j -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_keyword_benchmark +``` + +To run the person detection benchmark on FVP, run + +``` +make -j -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark +``` + +To run the person detection benchmark on FVP with Ethos-U, run + +``` +make -j -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark +``` \ No newline at end of file diff --git a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc index 1e98bbd53a99d2..388178343a6e3f 100644 --- a/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc +++ b/tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc @@ -63,13 +63,13 @@ void PersonDetectionNIerations(const int8_t* input, int iterations, PersonDetectionBenchmarkRunner& benchmark_runner, MicroProfiler& profiler) { benchmark_runner.SetInput(input); - int32_t ticks = 0; + uint32_t ticks = 0; for (int i = 0; i < iterations; ++i) { profiler.ClearEvents(); benchmark_runner.RunSingleIteration(); ticks += profiler.GetTotalTicks(); } - MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks)); + MicroPrintf("%s took %u ticks (%u ms)", tag, ticks, TicksToMs(ticks)); } } // namespace tflite diff --git a/tensorflow/lite/micro/cortex_m_corstone_300/micro_time.cc b/tensorflow/lite/micro/cortex_m_corstone_300/micro_time.cc new file mode 100644 index 00000000000000..a7db6e482acbc9 --- /dev/null +++ b/tensorflow/lite/micro/cortex_m_corstone_300/micro_time.cc @@ -0,0 +1,21 @@ +/* Copyright 2021 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +// This file is empty to ensure that a specialized implementation of +// micro_time.h is used (instead of the default implementation from +// tensorflow/lite/micro/micro_time.cc). +// +// The actual target-specific implementation of micro_time.h is in +// system_setup.cc since that allows us to consolidate all the target-specific +// specializations into one source file. diff --git a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc index 4b716e90ef7ac7..ab4b2ef02a6048 100644 --- a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc +++ b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc @@ -21,10 +21,60 @@ limitations under the License. #include CMSIS_DEVICE_ARM_CORTEX_M_XX_HEADER_FILE #endif #include "tensorflow/lite/micro/micro_error_reporter.h" +#include "tensorflow/lite/micro/micro_time.h" #include "tensorflow/lite/micro/system_setup.h" +// DWT (Data Watchpoint and Trace) registers, only exists on ARM Cortex with a +// DWT unit. +#define KIN1_DWT_CONTROL (*((volatile uint32_t*)0xE0001000)) + +// DWT Control register. +#define KIN1_DWT_CYCCNTENA_BIT (1UL << 0) + +// CYCCNTENA bit in DWT_CONTROL register. +#define KIN1_DWT_CYCCNT (*((volatile uint32_t*)0xE0001004)) + +// DWT Cycle Counter register. +#define KIN1_DEMCR (*((volatile uint32_t*)0xE000EDFC)) + +// DEMCR: Debug Exception and Monitor Control Register. +#define KIN1_TRCENA_BIT (1UL << 24) + +// Trace enable bit in DEMCR register. +#define KIN1_LAR (*((volatile uint32_t*)0xE0001FB0)) + +// Unlock access to DWT (ITM, etc.)registers. +#define KIN1_UnlockAccessToDWT() KIN1_LAR = 0xC5ACCE55; + +// TRCENA: Enable trace and debug block DEMCR (Debug Exception and Monitor +// Control Register. +#define KIN1_InitCycleCounter() KIN1_DEMCR |= KIN1_TRCENA_BIT + +#define KIN1_ResetCycleCounter() KIN1_DWT_CYCCNT = 0 +#define KIN1_EnableCycleCounter() KIN1_DWT_CONTROL |= KIN1_DWT_CYCCNTENA_BIT +#define KIN1_DisableCycleCounter() KIN1_DWT_CONTROL &= ~KIN1_DWT_CYCCNTENA_BIT +#define KIN1_GetCycleCounter() KIN1_DWT_CYCCNT + namespace tflite { +namespace { +constexpr int kClocksPerSecond = 25e6; +} // namespace + +int32_t ticks_per_second() { return kClocksPerSecond; } + +int32_t GetCurrentTimeTicks() { + static bool is_initialized = false; + if (!is_initialized) { + KIN1_UnlockAccessToDWT(); + KIN1_InitCycleCounter(); + KIN1_ResetCycleCounter(); + KIN1_EnableCycleCounter(); + is_initialized = true; + } + return KIN1_GetCycleCounter(); +} + #ifdef ETHOS_U void ethosuIrqHandler0() { ethosu_irq_handler(); } #endif diff --git a/tensorflow/lite/micro/kernels/ethos_u/README.md b/tensorflow/lite/micro/kernels/ethos_u/README.md index 11c69df4a9d851..b29a8c0dcc50b3 100644 --- a/tensorflow/lite/micro/kernels/ethos_u/README.md +++ b/tensorflow/lite/micro/kernels/ethos_u/README.md @@ -52,6 +52,8 @@ In order to run a test with Ethos-U55 enabled, a platform with corresponding har On top of that the .tflite model needs to be modified according subchapter "Ethos-U custom operator" above. +The log level of the Ethos-U driver can be set in the build command. For example: ETHOSU_LOG_SEVERITY=ETHOSU_LOG_INFO. + ## Example using network tester See tensorflow/lite/micro/examples/network_tester/README.md for more info. diff --git a/tensorflow/lite/micro/micro_time.h b/tensorflow/lite/micro/micro_time.h index fac9069b1a7bdc..79d10078d3499c 100644 --- a/tensorflow/lite/micro/micro_time.h +++ b/tensorflow/lite/micro/micro_time.h @@ -26,9 +26,9 @@ int32_t ticks_per_second(); // Return time in ticks. The meaning of a tick varies per platform. int32_t GetCurrentTimeTicks(); -inline int32_t TicksToMs(int32_t ticks) { - return static_cast(1000.0f * static_cast(ticks) / - static_cast(ticks_per_second())); +inline uint32_t TicksToMs(uint32_t ticks) { + return static_cast(1000.0f * static_cast(ticks) / + static_cast(ticks_per_second())); } } // namespace tflite diff --git a/tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh b/tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh index b51f49caeb488a..9b39ee4adf5c65 100755 --- a/tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh +++ b/tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh @@ -39,11 +39,14 @@ FVP+='-C mps3_board.uart0.unbuffered_output=1 ' FVP+='-C mps3_board.uart0.shutdown_on_eot=1' ${FVP} ${BINARY_TO_TEST} | tee ${MICRO_LOG_FILENAME} -if grep -q "$PASS_STRING" ${MICRO_LOG_FILENAME} +if [[ ${2} != "non_test_binary" ]] then - echo "$BINARY_TO_TEST: PASS" - exit 0 -else - echo "$BINARY_TO_TEST: FAIL - '$PASS_STRING' not found in logs." - exit 1 + if grep -q "$PASS_STRING" ${MICRO_LOG_FILENAME} + then + echo "$BINARY_TO_TEST: PASS" + exit 0 + else + echo "$BINARY_TO_TEST: FAIL - '$PASS_STRING' not found in logs." + exit 1 + fi fi diff --git a/tensorflow/lite/micro/tools/make/ethos_u_core_platform_download.sh b/tensorflow/lite/micro/tools/make/ethos_u_core_platform_download.sh index 5c02a39be20916..e5e4ed62db1f9c 100755 --- a/tensorflow/lite/micro/tools/make/ethos_u_core_platform_download.sh +++ b/tensorflow/lite/micro/tools/make/ethos_u_core_platform_download.sh @@ -80,6 +80,9 @@ else sed -i '/rodata/d' ${LINKER_PATH}/platform_parsed.ld sed -i 's/network_model_sec/\.rodata\*/' ${LINKER_PATH}/platform_parsed.ld + # Allow tensor_arena in namespace. This will put tensor arena in SRAM intended by linker file. + sed -i 's/tensor_arena/\*tensor_arena\*/' ${LINKER_PATH}/platform_parsed.ld + # Patch retarget.c so that g++ can find _exit symbol. cat <> ${DOWNLOADED_ETHOS_U_CORE_PLATFORM_PATH}/targets/corstone-300/retarget.c diff --git a/tensorflow/lite/micro/tools/make/ext_libs/ethos_u.inc b/tensorflow/lite/micro/tools/make/ext_libs/ethos_u.inc index c9b7548d20729c..e676b6cc2ae2c5 100644 --- a/tensorflow/lite/micro/tools/make/ext_libs/ethos_u.inc +++ b/tensorflow/lite/micro/tools/make/ext_libs/ethos_u.inc @@ -42,10 +42,15 @@ INCLUDES += -I$(ETHOSU_DRIVER_PATH)/include \ -I$(CMSIS_PATH)/CMSIS/Core/Include GENERATED_PROJECT_INCLUDES += -I./$(ETHOSU_DRIVER_PATH)/include -ETHOSU_LOG_SEVERITY := ETHOSU_LOG_INFO +ETHOSU_LOG_SEVERITY := ETHOSU_LOG_WARN CCFLAGS += -DETHOSU_LOG_SEVERITY=$(ETHOSU_LOG_SEVERITY) # TODO(#47718): resolve warnings. CCFLAGS += \ -Wno-return-type \ -Wno-format + +ifeq ($(TOOLCHAIN), gcc) + CCFLAGS += \ + -Wno-unused-but-set-variable +endif \ No newline at end of file diff --git a/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc b/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc index 771413b29d1df5..2c85e3014f398e 100644 --- a/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc +++ b/tensorflow/lite/micro/tools/make/targets/cortex_m_corstone_300_makefile.inc @@ -151,9 +151,6 @@ INCLUDES += \ -I$(CMSIS_PATH)/Device/ARM/$(ARM_CPU)/Include \ -I$(CMSIS_PATH)/CMSIS/Core/Include -# TODO(#47071): Examine why Micro benchmarks fails. -MICRO_LITE_BENCHMARKS := $(filter-out tensorflow/lite/micro/benchmarks/Makefile.inc, $(MICRO_LITE_BENCHMARKS)) - # TODO(#47070): Examine why some tests fail here. EXCLUDED_TESTS := \ tensorflow/lite/micro/micro_interpreter_test.cc \ From 0b515c381f9b3eb083a9052c6375e1b5c7bd7df0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5ns=20Nilsson?= Date: Fri, 21 May 2021 10:27:49 +0200 Subject: [PATCH 2/3] TFLM: Update benchmarks after review comments. --- tensorflow/lite/micro/benchmarks/README.md | 16 +++++++++------- tensorflow/lite/micro/micro_time.h | 6 +++--- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/tensorflow/lite/micro/benchmarks/README.md b/tensorflow/lite/micro/benchmarks/README.md index cdd7b6685ac784..e3901bdae280e7 100644 --- a/tensorflow/lite/micro/benchmarks/README.md +++ b/tensorflow/lite/micro/benchmarks/README.md @@ -11,6 +11,7 @@ platform. - [Run on x86](#run-on-x86) - [Run on Xtensa XPG Simulator](#run-on-xtensa-xpg-simulator) - [Run on Sparkfun Edge](#run-on-sparkfun-edge) +- [Run on FVP based on Arm Corstone-300 software](#run-on-fvp-based-on-arm-corstone-300-software) ## Keyword benchmark @@ -65,26 +66,27 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=sparkfun_edge person_de Refer to flashing instructions in the [Person Detection Example](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/person_detection/README.md#running-on-sparkfun-edge). -## Run on FVP based on Arm Corstone-300 software. -For more info about the Corstone-300 software see: tensorflow/lite/micro/cortex_m_corstone_300/README.md. +## Run on FVP based on Arm Corstone-300 software +For more info about the Corstone-300 software see: [tensorflow/lite/micro/cortex_m_corstone_300/README.md](../cortex_m_corstone_300/README.md). + + Disclaimer: Executing the benchmark test on the Corstone-300 software will provide a general metric of instructions executed. The estimates are not cycle accurate, however it aligns to instruction per cycle, and is a consistent environment. This means it can detect if code changes changed performance. -The person detection benchmark can also run with Ethos-U enabled, as the downloaded model will be optimized for Ethos-U. -For more info see: tensorflow/lite/micro/kernels/ethos_u/README.md. +The person detection benchmark can also run with Ethos-U enabled, as the downloaded model will be optimized for Ethos-U. For more info see: [tensorflow/lite/micro/kernels/ethos_u/README.md](../kernels/ethos_u/README.md). -To run the keyword benchmark on FVP, run +To run the keyword benchmark on FVP: ``` make -j -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_keyword_benchmark ``` -To run the person detection benchmark on FVP, run +To run the person detection benchmark on FVP: ``` make -j -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark ``` -To run the person detection benchmark on FVP with Ethos-U, run +To run the person detection benchmark on FVP with Ethos-U: ``` make -j -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark diff --git a/tensorflow/lite/micro/micro_time.h b/tensorflow/lite/micro/micro_time.h index 79d10078d3499c..fac9069b1a7bdc 100644 --- a/tensorflow/lite/micro/micro_time.h +++ b/tensorflow/lite/micro/micro_time.h @@ -26,9 +26,9 @@ int32_t ticks_per_second(); // Return time in ticks. The meaning of a tick varies per platform. int32_t GetCurrentTimeTicks(); -inline uint32_t TicksToMs(uint32_t ticks) { - return static_cast(1000.0f * static_cast(ticks) / - static_cast(ticks_per_second())); +inline int32_t TicksToMs(int32_t ticks) { + return static_cast(1000.0f * static_cast(ticks) / + static_cast(ticks_per_second())); } } // namespace tflite From f57e079ba5d8d86e9e11c7a8c31d990078320ccc Mon Sep 17 00:00:00 2001 From: Advait Jain Date: Thu, 27 May 2021 22:03:35 -0700 Subject: [PATCH 3/3] Move timer intialization to the init function. --- .../micro/cortex_m_corstone_300/system_setup.cc | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc index ab4b2ef02a6048..dc2178ee340463 100644 --- a/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc +++ b/tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc @@ -63,17 +63,7 @@ constexpr int kClocksPerSecond = 25e6; int32_t ticks_per_second() { return kClocksPerSecond; } -int32_t GetCurrentTimeTicks() { - static bool is_initialized = false; - if (!is_initialized) { - KIN1_UnlockAccessToDWT(); - KIN1_InitCycleCounter(); - KIN1_ResetCycleCounter(); - KIN1_EnableCycleCounter(); - is_initialized = true; - } - return KIN1_GetCycleCounter(); -} +int32_t GetCurrentTimeTicks() { return KIN1_GetCycleCounter(); } #ifdef ETHOS_U void ethosuIrqHandler0() { ethosu_irq_handler(); } @@ -86,6 +76,11 @@ void uart_init(void); void InitializeTarget() { uart_init(); + KIN1_UnlockAccessToDWT(); + KIN1_InitCycleCounter(); + KIN1_ResetCycleCounter(); + KIN1_EnableCycleCounter(); + #ifdef ETHOS_U constexpr int ethosu_base_address = 0x48102000; constexpr int ethosu_irq = 56;