pytorch · oscarandersson8218 · Aug 26, 2025 · Aug 21, 2025 · digantdesai · Aug 29, 2025
@@ -4,6 +4,8 @@
 # LICENSE file in the root directory of this source tree.
 
 function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
+  message(STATUS "Fetching Ethos-U content into ${ETHOS_SDK_PATH}")
+
   file(MAKE_DIRECTORY ${ETHOS_SDK_PATH}/../ethos_u)
   include(FetchContent)
   set(ethos_u_base_tag "25.05")
@@ -55,7 +57,6 @@ function(fetch_ethos_u_content ETHOS_SDK_PATH ET_DIR_PATH)
       "pwd && source backends/arm/scripts/utils.sh && patch_repo ${ETHOS_SDK_PATH}/core_platform ${core_platform_base_rev} ${patch_dir}"
     WORKING_DIRECTORY ${ET_DIR_PATH} COMMAND_ECHO STDOUT
   )
-
 endfunction()
 
 function(add_corstone_subdirectory SYSTEM_CONFIG ETHOS_SDK_PATH)

@@ -18,13 +18,12 @@
    "source": [
     "# Ethos-U delegate flow example\n",
     "\n",
-    "This guide demonstrates the full flow for running a module on Arm Ethos-U using ExecuTorch. \n",
+    "This guide demonstrates the full flow for running a module on Arm Ethos-U55 using ExecuTorch.\n",
     "Tested on Linux x86_64 and macOS aarch64. If something is not working for you, please raise a GitHub issue and tag Arm.\n",
     "\n",
     "Before you begin:\n",
     "1. (In a clean virtual environment with a compatible Python version) Install executorch using `./install_executorch.sh`\n",
     "2. Install Arm cross-compilation toolchain and simulators using `./examples/arm/setup.sh --i-agree-to-the-contained-eula`\n",
-    "3. Add Arm cross-compilation toolchain and simulators to PATH using `./examples/arm/ethos-u-scratch/setup_path.sh` \n",
     "\n",
     "With all commands executed from the base `executorch` folder.\n",
     "\n",
@@ -70,7 +69,7 @@
    "source": [
     "To run on Ethos-U the `graph_module` must be quantized using the `arm_quantizer`. Quantization can be done in multiple ways and it can be customized for different parts of the graph; shown here is the recommended path for the EthosUBackend. Quantization also requires calibrating the module with example inputs.\n",
     "\n",
-    "Again printing the module, it can be seen that the quantization wraps the node in quantization/dequantization nodes which contain the computed quanitzation parameters.",
+    "Again printing the module, it can be seen that the quantization wraps the node in quantization/dequantization nodes which contain the computed quanitzation parameters.\n",
     "\n",
     "With the default passes for the Arm Ethos-U backend, assuming the model lowers fully to the Ethos-U, the exported program is composed of a Quantize node, Ethos-U custom delegate and a Dequantize node. In some circumstances, you may want to feed quantized input to the Neural Network straight away, e.g. if you have a camera sensor outputting (u)int8 data and keep all the arithmetic of the application in the int8 domain. For these cases, you can apply the `exir/passes/quantize_io_pass.py`. See the unit test in `backends/arm/test/passes/test_ioquantization_pass.py`for an example how to feed quantized inputs and obtain quantized outputs.\n"
    ]
@@ -88,13 +87,11 @@
     ")\n",
     "from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e\n",
     "\n",
-    "target = \"ethos-u55-128\"\n",
-    "\n",
     "# Create a compilation spec describing the target for configuring the quantizer\n",
     "# Some args are used by the Arm Vela graph compiler later in the example. Refer to Arm Vela documentation for an\n",
     "# explanation of its flags: https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-vela/-/blob/main/OPTIONS.md\n",
     "spec_builder = ArmCompileSpecBuilder().ethosu_compile_spec(\n",
-    "            target,\n",
+    "            target=\"ethos-u55-128\",\n",
     "            system_config=\"Ethos_U55_High_End_Embedded\",\n",
     "            memory_mode=\"Shared_Sram\",\n",
     "            extra_flags=\"--output-format=raw --debug-force-regor\"\n",
@@ -139,7 +136,6 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import os\n",
     "from executorch.backends.arm.ethosu import EthosUPartitioner\n",
     "from executorch.exir import (\n",
     "    EdgeCompileConfig,\n",
@@ -165,15 +161,10 @@
     "            config=ExecutorchBackendConfig(extract_delegate_segments=False)\n",
     "        )\n",
     "\n",
-    "executorch_program_manager.exported_program().module().print_readable()\n",
+    "_ = executorch_program_manager.exported_program().module().print_readable()\n",
     "\n",
     "# Save pte file\n",
-    "cwd_dir = os.getcwd()\n",
-    "pte_base_name = \"simple_example\"\n",
-    "pte_name = pte_base_name + \".pte\"\n",
-    "pte_path = os.path.join(cwd_dir, pte_name)\n",
-    "save_pte_program(executorch_program_manager, pte_name)\n",
-    "assert os.path.exists(pte_path), \"Build failed; no .pte-file found\""
+    "save_pte_program(executorch_program_manager, \"ethos_u_minimal_example.pte\")"
    ]
   },
   {
@@ -183,7 +174,7 @@
     "## Build executor runtime\n",
     "\n",
     "After the AOT compilation flow is done, the runtime can be cross compiled and linked to the produced .pte-file using the Arm cross-compilation toolchain. This is done in two steps:\n",
-    "1. Build and install the executorch library and EthosUDelegate.\n",
+    "1. Build and install the executorch libraries and EthosUDelegate.\n",
     "2. Build and link the `arm_executor_runner` and generate kernel bindings for any non delegated ops."
    ]
   },
@@ -193,22 +184,37 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import subprocess\n",
-    "\n",
-    "# Setup paths\n",
-    "et_dir = os.path.join(cwd_dir, \"..\", \"..\")\n",
-    "et_dir = os.path.abspath(et_dir)\n",
-    "script_dir = os.path.join(et_dir, \"backends\", \"arm\", \"scripts\")\n",
-    "\n",
-    "# Cross-compile executorch \n",
-    "subprocess.run(os.path.join(script_dir, \"build_executorch.sh\"), shell=True, cwd=et_dir)\n",
-    "\n",
-    "# Cross-compile executorch runner\n",
-    "args = f\"--pte={pte_path} --target={target}\"\n",
-    "subprocess.run(os.path.join(script_dir, \"build_executor_runner.sh\") + \" \" + args, shell=True, cwd=et_dir)\n",
-    "\n",
-    "elf_path = os.path.join(cwd_dir, pte_base_name, \"cmake-out\", \"arm_executor_runner\")\n",
-    "assert os.path.exists(elf_path), \"Build failed; no .elf-file found\""
+    "%%bash\n",
+    "# Ensure the arm-none-eabi-gcc toolchain and FVP:s are available on $PATH\n",
+    "source ethos-u-scratch/setup_path.sh\n",
+    "\n",
+    "# Build executorch libraries cross-compiled for arm baremetal to executorch/cmake-out-arm\n",
+    "cmake --preset arm-baremetal \\\n",
+    "-DCMAKE_BUILD_TYPE=Release \\\n",
+    "-B../../cmake-out-arm ../..\n",
+    "cmake --build ../../cmake-out-arm --target install -j$(nproc) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%bash \n",
+    "source ethos-u-scratch/setup_path.sh\n",
+    "\n",
+    "# Build example executor runner application to examples/arm/ethos_u_minimal_example\n",
         backends/arm/test/test_arm_ootb.sh 
         backends/arm/test/test_arm_ootb.sh 
+    "cmake -DCMAKE_TOOLCHAIN_FILE=$(pwd)/ethos-u-setup/arm-none-eabi-gcc.cmake \\\n",
+    "      -DCMAKE_BUILD_TYPE=Release \\\n",
+    "      -DET_PTE_FILE_PATH=ethos_u_minimal_example.pte \\\n",
+    "      -DTARGET_CPU=cortex-m55 \\\n",
+    "      -DETHOSU_TARGET_NPU_CONFIG=ethos-u55-128 \\\n",
+    "      -DMEMORY_MODE=Shared_Sram \\\n",
+    "      -DSYSTEM_CONFIG=Ethos_U55_High_End_Embedded \\\n",
+    "      -Bethos_u_minimal_example \\\n",
+    "      executor_runner\n",
+    "cmake --build ethos_u_minimal_example -j$(nproc) -- arm_executor_runner"
    ]
   },
   {
@@ -217,7 +223,7 @@
    "source": [
     "# Run on simulated model\n",
     "\n",
-    "We can finally use the `backends/arm/scripts/run_fvp.sh` utility script to run the .elf-file on simulated Arm hardware. This Script runs the model with an input of ones, so the expected result of the addition should be close to 2."
+    "We can finally use the `backends/arm/scripts/run_fvp.sh` utility script to run the .elf-file on simulated Arm hardware. The example application is by default built with an input of ones, so the expected result of the quantized addition should be close to 2."
    ]
   },
   {
@@ -226,8 +232,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "args = f\"--elf={elf_path}  --target={target}\"\n",
-    "subprocess.run(os.path.join(script_dir, \"run_fvp.sh\") + \" \" + args, shell=True, cwd=et_dir)"
+    "%%bash \n",
+    "source ethos-u-scratch/setup_path.sh\n",
+    "\n",
+    "# Run the example\n",
+    "../../backends/arm/scripts/run_fvp.sh --elf=ethos_u_minimal_example/arm_executor_runner --target=ethos-u55-128"
    ]
   }
  ],

@@ -35,12 +35,12 @@ endif()
 
 # Example ExecuTorch demo for bare metal Cortex-M based systems
 set(ET_DIR_PATH
-    "../../.."
+    "${CMAKE_CURRENT_SOURCE_DIR}/../../.."
     CACHE PATH "Path to ExecuTorch dir"
 )
 set(ET_BUILD_DIR_PATH
-    "${ET_DIR_PATH}/cmake-out"
-    CACHE PATH "Path to ExecuTorch build dir"
+    "${ET_DIR_PATH}/cmake-out-arm"
+    CACHE PATH "Path to ExecuTorch build/install dir"
 )
 set(ET_INCLUDE_PATH
     "${ET_DIR_PATH}/.."
@@ -83,14 +83,6 @@ message(STATUS "SYSTEM_CONFIG is ${SYSTEM_CONFIG}")
 message(STATUS "MEMORY_MODE is ${MEMORY_MODE}")
 message(STATUS "ET_NUM_INFERENCES is ${ET_NUM_INFERENCES}")
 
-get_filename_component(ET_BUILD_DIR_PATH ${ET_BUILD_DIR_PATH} REALPATH)
-get_filename_component(ET_DIR_PATH ${ET_DIR_PATH} REALPATH)
-get_filename_component(ET_INCLUDE_PATH ${ET_INCLUDE_PATH} REALPATH)
-get_filename_component(ETHOS_SDK_PATH ${ETHOS_SDK_PATH} REALPATH)
-if(NOT ${SEMIHOSTING})
-  get_filename_component(ET_PTE_FILE_PATH ${ET_PTE_FILE_PATH} REALPATH)
-endif()
-
 # By default, use 2MB of temporary scratch buffer For Dedicated_Sram, use 64MB
 # for the temporary scratch buffer and 384KB for the fast scratch buffer(the
 # cache, applicable only for Ethos-U65 and Ethos-U85)