Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TFLM: Enable FVP target in benchmarks #49144

Merged
merged 4 commits into from
Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
10 changes: 9 additions & 1 deletion tensorflow/lite/micro/benchmarks/Makefile.inc
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,17 @@ tensorflow/lite/micro/benchmarks/micro_benchmark.h
PERSON_DETECTION_BENCHMARK_SRCS := \
tensorflow/lite/micro/benchmarks/person_detection_benchmark.cc \
$(MAKEFILE_DIR)/downloads/person_model_int8/no_person_image_data.cc \
$(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data.cc \
$(MAKEFILE_DIR)/downloads/person_model_int8/person_image_data.cc

ifeq ($(CO_PROCESSOR),ethos_u)
# Ethos-U use a Vela optimized version of the original model.
PERSON_DETECTION_BENCHMARK_SRCS += \
$(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data_vela.cc
else
PERSON_DETECTION_BENCHMARK_SRCS += \
$(MAKEFILE_DIR)/downloads/person_model_int8/person_detect_model_data.cc
endif

PERSON_DETECTION_BENCHMARK_HDRS := \
tensorflow/lite/micro/examples/person_detection/person_detect_model_data.h \
tensorflow/lite/micro/examples/person_detection/no_person_image_data.h \
Expand Down
27 changes: 27 additions & 0 deletions tensorflow/lite/micro/benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ platform.
- [Run on x86](#run-on-x86)
- [Run on Xtensa XPG Simulator](#run-on-xtensa-xpg-simulator)
- [Run on Sparkfun Edge](#run-on-sparkfun-edge)
- [Run on FVP based on Arm Corstone-300 software](#run-on-fvp-based-on-arm-corstone-300-software)

## Keyword benchmark

Expand Down Expand Up @@ -64,3 +65,29 @@ make -f tensorflow/lite/micro/tools/make/Makefile TARGET=sparkfun_edge person_de

Refer to flashing instructions in the [Person Detection Example](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/lite/micro/examples/person_detection/README.md#running-on-sparkfun-edge).


## Run on FVP based on Arm Corstone-300 software
For more info about the Corstone-300 software see: [tensorflow/lite/micro/cortex_m_corstone_300/README.md](../cortex_m_corstone_300/README.md).


Disclaimer: Executing the benchmark test on the Corstone-300 software will provide a general metric of instructions executed. The estimates are not cycle accurate, however it aligns to instruction per cycle, and is a consistent environment. This means it can detect if code changes changed performance.

The person detection benchmark can also run with Ethos-U enabled, as the downloaded model will be optimized for Ethos-U. For more info see: [tensorflow/lite/micro/kernels/ethos_u/README.md](../kernels/ethos_u/README.md).

To run the keyword benchmark on FVP:

```
make -j -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_keyword_benchmark
```

To run the person detection benchmark on FVP:

```
make -j -f tensorflow/lite/micro/tools/make/Makefile TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark
```

To run the person detection benchmark on FVP with Ethos-U:

```
make -j -f tensorflow/lite/micro/tools/make/Makefile CO_PROCESSOR=ethos_u TARGET=cortex_m_corstone_300 TARGET_ARCH=cortex-m55 run_person_detection_benchmark
```
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@ void PersonDetectionNIerations(const int8_t* input, int iterations,
PersonDetectionBenchmarkRunner& benchmark_runner,
MicroProfiler& profiler) {
benchmark_runner.SetInput(input);
int32_t ticks = 0;
uint32_t ticks = 0;
for (int i = 0; i < iterations; ++i) {
profiler.ClearEvents();
benchmark_runner.RunSingleIteration();
ticks += profiler.GetTotalTicks();
}
MicroPrintf("%s took %d ticks (%d ms)", tag, ticks, TicksToMs(ticks));
MicroPrintf("%s took %u ticks (%u ms)", tag, ticks, TicksToMs(ticks));
}

} // namespace tflite
Expand Down
21 changes: 21 additions & 0 deletions tensorflow/lite/micro/cortex_m_corstone_300/micro_time.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/
// This file is empty to ensure that a specialized implementation of
// micro_time.h is used (instead of the default implementation from
// tensorflow/lite/micro/micro_time.cc).
//
// The actual target-specific implementation of micro_time.h is in
// system_setup.cc since that allows us to consolidate all the target-specific
// specializations into one source file.
50 changes: 50 additions & 0 deletions tensorflow/lite/micro/cortex_m_corstone_300/system_setup.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,60 @@ limitations under the License.
#include CMSIS_DEVICE_ARM_CORTEX_M_XX_HEADER_FILE
#endif
#include "tensorflow/lite/micro/micro_error_reporter.h"
#include "tensorflow/lite/micro/micro_time.h"
#include "tensorflow/lite/micro/system_setup.h"

// DWT (Data Watchpoint and Trace) registers, only exists on ARM Cortex with a
// DWT unit.
#define KIN1_DWT_CONTROL (*((volatile uint32_t*)0xE0001000))

// DWT Control register.
#define KIN1_DWT_CYCCNTENA_BIT (1UL << 0)

// CYCCNTENA bit in DWT_CONTROL register.
#define KIN1_DWT_CYCCNT (*((volatile uint32_t*)0xE0001004))

// DWT Cycle Counter register.
#define KIN1_DEMCR (*((volatile uint32_t*)0xE000EDFC))

// DEMCR: Debug Exception and Monitor Control Register.
#define KIN1_TRCENA_BIT (1UL << 24)

// Trace enable bit in DEMCR register.
#define KIN1_LAR (*((volatile uint32_t*)0xE0001FB0))

// Unlock access to DWT (ITM, etc.)registers.
#define KIN1_UnlockAccessToDWT() KIN1_LAR = 0xC5ACCE55;

// TRCENA: Enable trace and debug block DEMCR (Debug Exception and Monitor
// Control Register.
#define KIN1_InitCycleCounter() KIN1_DEMCR |= KIN1_TRCENA_BIT

#define KIN1_ResetCycleCounter() KIN1_DWT_CYCCNT = 0
#define KIN1_EnableCycleCounter() KIN1_DWT_CONTROL |= KIN1_DWT_CYCCNTENA_BIT
#define KIN1_DisableCycleCounter() KIN1_DWT_CONTROL &= ~KIN1_DWT_CYCCNTENA_BIT
#define KIN1_GetCycleCounter() KIN1_DWT_CYCCNT

namespace tflite {

namespace {
constexpr int kClocksPerSecond = 25e6;
} // namespace

int32_t ticks_per_second() { return kClocksPerSecond; }

int32_t GetCurrentTimeTicks() {
static bool is_initialized = false;
if (!is_initialized) {
KIN1_UnlockAccessToDWT();
KIN1_InitCycleCounter();
KIN1_ResetCycleCounter();
KIN1_EnableCycleCounter();
is_initialized = true;
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

apologies for missing this earlier. Let's move this to InitializeTarget and remove static bool is_inititalized

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went ahead and pushed a commit with this change.

return KIN1_GetCycleCounter();
}

#ifdef ETHOS_U
void ethosuIrqHandler0() { ethosu_irq_handler(); }
#endif
Expand Down
2 changes: 2 additions & 0 deletions tensorflow/lite/micro/kernels/ethos_u/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ In order to run a test with Ethos-U55 enabled, a platform with corresponding har

On top of that the .tflite model needs to be modified according subchapter "Ethos-U custom operator" above.

The log level of the Ethos-U driver can be set in the build command. For example: ETHOSU_LOG_SEVERITY=ETHOSU_LOG_INFO.

## Example using network tester
See tensorflow/lite/micro/examples/network_tester/README.md for more info.

Expand Down
15 changes: 9 additions & 6 deletions tensorflow/lite/micro/testing/test_with_arm_corstone_300.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@ FVP+='-C mps3_board.uart0.unbuffered_output=1 '
FVP+='-C mps3_board.uart0.shutdown_on_eot=1'
${FVP} ${BINARY_TO_TEST} | tee ${MICRO_LOG_FILENAME}

if grep -q "$PASS_STRING" ${MICRO_LOG_FILENAME}
if [[ ${2} != "non_test_binary" ]]
then
echo "$BINARY_TO_TEST: PASS"
exit 0
else
echo "$BINARY_TO_TEST: FAIL - '$PASS_STRING' not found in logs."
exit 1
if grep -q "$PASS_STRING" ${MICRO_LOG_FILENAME}
then
echo "$BINARY_TO_TEST: PASS"
exit 0
else
echo "$BINARY_TO_TEST: FAIL - '$PASS_STRING' not found in logs."
exit 1
fi
fi
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ else
sed -i '/rodata/d' ${LINKER_PATH}/platform_parsed.ld
sed -i 's/network_model_sec/\.rodata\*/' ${LINKER_PATH}/platform_parsed.ld

# Allow tensor_arena in namespace. This will put tensor arena in SRAM intended by linker file.
sed -i 's/tensor_arena/\*tensor_arena\*/' ${LINKER_PATH}/platform_parsed.ld

# Patch retarget.c so that g++ can find _exit symbol.
cat <<EOT >> ${DOWNLOADED_ETHOS_U_CORE_PLATFORM_PATH}/targets/corstone-300/retarget.c

Expand Down
7 changes: 6 additions & 1 deletion tensorflow/lite/micro/tools/make/ext_libs/ethos_u.inc
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,15 @@ INCLUDES += -I$(ETHOSU_DRIVER_PATH)/include \
-I$(CMSIS_PATH)/CMSIS/Core/Include
GENERATED_PROJECT_INCLUDES += -I./$(ETHOSU_DRIVER_PATH)/include

ETHOSU_LOG_SEVERITY := ETHOSU_LOG_INFO
ETHOSU_LOG_SEVERITY := ETHOSU_LOG_WARN
CCFLAGS += -DETHOSU_LOG_SEVERITY=$(ETHOSU_LOG_SEVERITY)

# TODO(#47718): resolve warnings.
CCFLAGS += \
-Wno-return-type \
-Wno-format

ifeq ($(TOOLCHAIN), gcc)
CCFLAGS += \
-Wno-unused-but-set-variable
endif
Original file line number Diff line number Diff line change
Expand Up @@ -151,9 +151,6 @@ INCLUDES += \
-I$(CMSIS_PATH)/Device/ARM/$(ARM_CPU)/Include \
-I$(CMSIS_PATH)/CMSIS/Core/Include

# TODO(#47071): Examine why Micro benchmarks fails.
MICRO_LITE_BENCHMARKS := $(filter-out tensorflow/lite/micro/benchmarks/Makefile.inc, $(MICRO_LITE_BENCHMARKS))

# TODO(#47070): Examine why some tests fail here.
EXCLUDED_TESTS := \
tensorflow/lite/micro/micro_interpreter_test.cc \
Expand Down