Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 74 additions & 12 deletions examples/raspberry_pi/pico2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,21 +125,83 @@ target_compile_options(

set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gc-sections")

set(BAREMETAL_BUILD_DIR ${EXECUTORCH_ROOT}/executorch/cmake-out/)
set(BAREMETAL_BUILD_DIR
${EXECUTORCH_ROOT}/executorch/cmake-out/
CACHE STRING "ExecuTorch baremetal build dir"
)

# Link ExecuTorch and Pico libraries
target_link_libraries(
executorch_pico
PRIVATE ${BAREMETAL_BUILD_DIR}/lib/libexecutorch.a
${BAREMETAL_BUILD_DIR}/lib/libexecutorch_core.a
-Wl,--whole-archive
${BAREMETAL_BUILD_DIR}/lib/libportable_ops_lib.a
-Wl,--no-whole-archive
${BAREMETAL_BUILD_DIR}/lib/libportable_kernels.a
pico_stdlib
pico_stdio_usb
# CMSIS-NN support: link quantized cortex_m kernels instead of portable ops
option(USE_CMSIS_NN "Link CMSIS-NN INT8 kernels for Cortex-M33 acceleration"
OFF
)
option(USE_SELECTIVE_BUILD "Use selective build (only link model-required ops)"
OFF
)

if(USE_CMSIS_NN)
message(STATUS "CMSIS-NN enabled: linking cortex_m_ops_lib + cmsis-nn")
if(USE_SELECTIVE_BUILD)
# CMSIS-NN model uses only cortex_m:: ops, no portable ops needed. Skip
# --whole-archive on portable_ops_lib to avoid pulling unused ops.
message(STATUS "Selective build: CMSIS-NN only (no portable ops)")
target_link_libraries(
executorch_pico
PRIVATE ${BAREMETAL_BUILD_DIR}/lib/libexecutorch.a
${BAREMETAL_BUILD_DIR}/lib/libexecutorch_core.a
-Wl,--whole-archive
${BAREMETAL_BUILD_DIR}/lib/libcortex_m_ops_lib.a
-Wl,--no-whole-archive
${BAREMETAL_BUILD_DIR}/lib/libcortex_m_kernels.a
${BAREMETAL_BUILD_DIR}/lib/libportable_kernels.a
${BAREMETAL_BUILD_DIR}/lib/libcmsis-nn.a
pico_stdlib
pico_stdio_usb
)
else()
target_link_libraries(
executorch_pico
PRIVATE ${BAREMETAL_BUILD_DIR}/lib/libexecutorch.a
${BAREMETAL_BUILD_DIR}/lib/libexecutorch_core.a
-Wl,--whole-archive
${BAREMETAL_BUILD_DIR}/lib/libcortex_m_ops_lib.a
${BAREMETAL_BUILD_DIR}/lib/libportable_ops_lib.a
-Wl,--no-whole-archive
${BAREMETAL_BUILD_DIR}/lib/libcortex_m_kernels.a
${BAREMETAL_BUILD_DIR}/lib/libportable_kernels.a
${BAREMETAL_BUILD_DIR}/lib/libcmsis-nn.a
pico_stdlib
pico_stdio_usb
)
endif()
else()
if(USE_SELECTIVE_BUILD)
message(STATUS "Selective build: using executorch_selected_kernels")
target_link_libraries(
executorch_pico
PRIVATE ${BAREMETAL_BUILD_DIR}/lib/libexecutorch.a
${BAREMETAL_BUILD_DIR}/lib/libexecutorch_core.a
-Wl,--whole-archive
${BAREMETAL_BUILD_DIR}/lib/libexecutorch_selected_kernels.a
-Wl,--no-whole-archive
${BAREMETAL_BUILD_DIR}/lib/libportable_kernels.a
pico_stdlib
pico_stdio_usb
)
else()
target_link_libraries(
executorch_pico
PRIVATE ${BAREMETAL_BUILD_DIR}/lib/libexecutorch.a
${BAREMETAL_BUILD_DIR}/lib/libexecutorch_core.a
-Wl,--whole-archive
${BAREMETAL_BUILD_DIR}/lib/libportable_ops_lib.a
-Wl,--no-whole-archive
${BAREMETAL_BUILD_DIR}/lib/libportable_kernels.a
pico_stdlib
pico_stdio_usb
)
endif()
endif()

# Only add extra outputs if the target builds successfully
if(TARGET executorch_pico)
pico_add_extra_outputs(executorch_pico)
Expand Down
67 changes: 58 additions & 9 deletions examples/raspberry_pi/pico2/build_firmware_pico.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

#!/bin/bash
# build_firmware_pico.sh
# Simple script to cross-compile ExecuTorch and build Pico2 firmware with optional model input

Expand All @@ -17,26 +16,61 @@ PICO2_DIR="${ROOT_DIR}/examples/raspberry_pi/pico2"
BUILD_DIR="${PICO2_DIR}/build"
EXECUTORCH_BUILD_DIR="${ROOT_DIR}/cmake-out"

# Pico SDK 2.0's mbedtls requires this for CMake >= 3.30
export CMAKE_POLICY_VERSION_MINIMUM=3.5

# Portable nproc: use nproc on Linux, sysctl on macOS
if command -v nproc &>/dev/null; then
NPROC=$(nproc)
else
NPROC=$(sysctl -n hw.ncpu)
fi

# Source ARM toolchain if available and not already on PATH
if ! command -v arm-none-eabi-gcc &>/dev/null; then
SETUP_PATH="${ROOT_DIR}/examples/arm/arm-scratch/setup_path.sh"
if [ -f "${SETUP_PATH}" ]; then
source "${SETUP_PATH}"
else
# Try to find the toolchain directly
TOOLCHAIN_BIN=$(find "${ROOT_DIR}/examples/arm/arm-scratch" -name "arm-none-eabi-gcc" -type f 2>/dev/null | head -1)
if [ -n "${TOOLCHAIN_BIN:-}" ]; then
export PATH="$(dirname "${TOOLCHAIN_BIN}"):${PATH}"
else
echo "Error: arm-none-eabi-gcc not found. Run: ./examples/arm/setup.sh --i-agree-to-the-contained-eula"
exit 1
fi
fi
fi

echo "Using ARM toolchain: $(which arm-none-eabi-gcc)"

# Default model
DEFAULT_MODEL="default_model.pte"

usage() {
echo "Usage: $0 [--clean] [--model=path/to/model.pte]"
echo "Usage: $0 [--clean] [--cmsis] [--model=path/to/model.pte]"
echo " --clean Clean build directories"
echo " --cmsis Build with CMSIS-NN INT8 kernels (requires cortex_m backend)"
echo " --model=FILE Specify model file to embed (relative to pico2/)"
exit 1
}

# Parse args
MODEL_INPUT=""
CLEAN_BUILD=0
USE_CMSIS=0

for arg in "$@"; do
case $arg in
--clean)
CLEAN_BUILD=1
shift
;;
--cmsis)
USE_CMSIS=1
shift
;;
--model=*)
MODEL_INPUT="${arg#*=}"
shift
Expand Down Expand Up @@ -68,42 +102,57 @@ if [ -n "$MODEL_INPUT" ] && [ -f "${PICO2_DIR}/${MODEL_INPUT}" ]; then
echo "Using selective build from model: ${MODEL_ABS_PATH}"
fi

CMSIS_FLAGS=()
if [ $USE_CMSIS -eq 1 ]; then
echo "CMSIS-NN mode: building with Cortex-M backend and CMSIS-NN kernels"
CMSIS_FLAGS=(
-DEXECUTORCH_BUILD_CORTEX_M=ON
)
fi

cmake -B "${EXECUTORCH_BUILD_DIR}" \
-DCMAKE_TOOLCHAIN_FILE="${ROOT_DIR}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake" \
-DTARGET_CPU=cortex-m0plus \
-DTARGET_CPU=cortex-m33+nofp \
-DEXECUTORCH_BUILD_ARM_BAREMETAL=ON \
-DEXECUTORCH_PAL_DEFAULT=minimal \
-DEXECUTORCH_DTYPE_SELECTIVE_BUILD=ON \
-DCMAKE_BUILD_TYPE=MinSizeRel \
-DEXECUTORCH_ENABLE_LOGGING=OFF \
-DEXECUTORCH_SELECT_ALL_OPS=OFF \
-DEXECUTORCH_BUILD_EXECUTOR_RUNNER=OFF \
-DCMAKE_INSTALL_PREFIX="${EXECUTORCH_BUILD_DIR}" \
${SELECT_OPS_FLAGS} \
${CMSIS_FLAGS[@]+"${CMSIS_FLAGS[@]}"} \
"${ROOT_DIR}"

cmake --build "${EXECUTORCH_BUILD_DIR}" --target install -j$(nproc)
cmake --build "${EXECUTORCH_BUILD_DIR}" --target install -j${NPROC}

echo "ExecuTorch cross compile complete."

# Step 2: Build firmware for Pico2 with model input

cd "${PICO2_DIR}"

PICO_CMAKE_FLAGS=(-DPICO_BOARD=pico2 -DCMAKE_BUILD_TYPE=Release)

if [ $USE_CMSIS -eq 1 ]; then
PICO_CMAKE_FLAGS+=(-DUSE_CMSIS_NN=ON)
fi

Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The script enables ExecuTorch selective build via -DEXECUTORCH_SELECT_OPS_MODEL, but the Pico2 firmware CMake is never told to link the selective kernel library (USE_SELECTIVE_BUILD option added in CMakeLists.txt). As a result, the firmware still links the full portable_ops_lib under --whole-archive, negating the binary-size benefits of selective build. Consider propagating this automatically (e.g., add -DUSE_SELECTIVE_BUILD=ON when SELECT_OPS_FLAGS is set) so selective build works end-to-end.

Suggested change
if [ -n "${SELECT_OPS_FLAGS:-}" ]; then
PICO_CMAKE_FLAGS+=(-DUSE_SELECTIVE_BUILD=ON)
fi

Copilot uses AI. Check for mistakes.
if [ -n "$MODEL_INPUT" ]; then
# Use specified model
if [ ! -f "${MODEL_INPUT}" ]; then
echo "Error: Specified model file '${MODEL_INPUT}' not found in pico2 directory."
exit 1
fi
echo "Building firmware with model: ${MODEL_INPUT}"
cmake -B "${BUILD_DIR}" -DPICO_BOARD=pico2 -DINPUT_MODEL="./${MODEL_INPUT}" -DCMAKE_BUILD_TYPE=Release
PICO_CMAKE_FLAGS+=(-DINPUT_MODEL="./${MODEL_INPUT}")
else
# Use default model
echo "Building firmware with default model: ${DEFAULT_MODEL}"
cmake -B "${BUILD_DIR}" -DPICO_BOARD=pico2 -DINPUT_MODEL="./${DEFAULT_MODEL}" -DCMAKE_BUILD_TYPE=Release
PICO_CMAKE_FLAGS+=(-DINPUT_MODEL="./${DEFAULT_MODEL}")
fi

cmake --build "${BUILD_DIR}" -j$(nproc)
cmake -B "${BUILD_DIR}" "${PICO_CMAKE_FLAGS[@]}"

cmake --build "${BUILD_DIR}" -j${NPROC}

echo "Firmware build complete. Output in ${BUILD_DIR}, Binary: executorch_pico.uf2"
159 changes: 159 additions & 0 deletions examples/raspberry_pi/pico2/export_mlp_mnist_cmsis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#!/usr/bin/env python3
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Export the TinyMLP MNIST model with INT8 quantization for CMSIS-NN acceleration.

Uses the CortexMQuantizer to produce INT8 quantized ops that map to CMSIS-NN
kernels on Cortex-M33 (RP2350/Pico2). The model I/O stays float — quantize and
dequantize nodes are inserted inside the graph.

Usage:
python export_mlp_mnist_cmsis.py
python export_mlp_mnist_cmsis.py --output my_model.pte
python export_mlp_mnist_cmsis.py --num-calibration 200
"""

import argparse
import logging
import os

import torch

from executorch.backends.cortex_m.passes.cortex_m_pass_manager import CortexMPassManager
from executorch.backends.cortex_m.quantizer.quantizer import CortexMQuantizer
from executorch.exir import EdgeCompileConfig, ExecutorchBackendConfig, to_edge
from executorch.extension.export_util.utils import save_pte_program

from export_mlp_mnist import create_balanced_model, IMAGE_SIZE, test_comprehensive
from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e

FORMAT = "[%(levelname)s %(asctime)s %(filename)s:%(lineno)s] %(message)s"
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)


def get_calibration_data(num_samples: int = 100):
"""
Generate calibration data for quantization.
Mixes structured digit-like patterns and random noise so the observer
sees a representative activation range.
"""
calibration_data = []

# Structured patterns that look like the digits the model will see
for _ in range(num_samples // 2):
x = torch.zeros(1, IMAGE_SIZE, IMAGE_SIZE)
# Random vertical / horizontal strokes
col = torch.randint(5, 23, (1,)).item()
row = torch.randint(5, 23, (1,)).item()
x[0, 2:26, col - 1 : col + 2] = 1.0 # vertical stroke
x[0, row - 1 : row + 2, 5:23] = 1.0 # horizontal stroke
calibration_data.append(x)

# Random pixel patterns
for _ in range(num_samples - num_samples // 2):
x = (torch.rand(1, IMAGE_SIZE, IMAGE_SIZE) > 0.7).float()
calibration_data.append(x)

return calibration_data


def quantize_model(model, calibration_data):
quantizer = CortexMQuantizer()
example_input = calibration_data[0]

exported = torch.export.export(model, (example_input,))
graph_module = exported.module()

prepared = prepare_pt2e(graph_module, quantizer)

logger.info(f"Calibrating with {len(calibration_data)} samples...")
with torch.no_grad():
for i, data in enumerate(calibration_data):
prepared(data)
if (i + 1) % 25 == 0:
logger.info(f" Calibrated {i + 1}/{len(calibration_data)} samples")

quantized = convert_pt2e(prepared)
return quantized, example_input


def export_to_pte(quantized_model, example_input, output_path: str):
exported_program = torch.export.export(quantized_model, (example_input,))

edge_config = EdgeCompileConfig(
_check_ir_validity=False,
preserve_ops=[torch.ops.aten.linear.default],
)
edge_program = to_edge(exported_program, compile_config=edge_config)
logger.info("Edge program created")

logger.info("Applying Cortex-M optimization passes...")
pass_manager = CortexMPassManager(edge_program.exported_program())
transformed_ep = pass_manager.transform()

edge_program = to_edge(transformed_ep, compile_config=edge_config)

Comment on lines +97 to +101
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CortexMPassManager.transform() returns an ExportedProgram that is already in Edge dialect (it’s transforming edge_program.exported_program()). Re-wrapping that result with to_edge() is meant for ATen->Edge conversion and can re-run ATen->Edge passes on an Edge program. Instead, construct an EdgeProgramManager directly from the transformed Edge ExportedProgram (e.g., EdgeProgramManager(transformed_ep, compile_config=edge_config)) before calling to_executorch().

Copilot uses AI. Check for mistakes.
logger.info("Converting to ExecuTorch format...")
exec_program = edge_program.to_executorch(
config=ExecutorchBackendConfig(extract_delegate_segments=False)
)

save_pte_program(exec_program, output_path)
file_size = os.path.getsize(output_path)
logger.info(f"Model saved to {output_path} ({file_size / 1024:.1f} KB)")


def main():
parser = argparse.ArgumentParser(
description="Export TinyMLP MNIST for Cortex-M with CMSIS-NN (INT8)"
)
parser.add_argument(
"--output",
type=str,
default="balanced_tiny_mlp_mnist_cmsis.pte",
help="Output .pte file path",
)
parser.add_argument(
"--num-calibration",
type=int,
default=100,
help="Number of calibration samples for quantization",
)
parser.add_argument("--debug", action="store_true", help="Enable debug logging")
args = parser.parse_args()

if args.debug:
logging.getLogger().setLevel(logging.DEBUG)

logger.info("Creating balanced MLP MNIST model...")
model = create_balanced_model()
model.eval()

logger.info("Testing FP32 model before quantization:")
test_comprehensive(model)

calibration_data = get_calibration_data(args.num_calibration)
quantized_model, example_input = quantize_model(model, calibration_data)

logger.info("Testing quantized model:")
with torch.no_grad():
test_input = torch.zeros(1, IMAGE_SIZE, IMAGE_SIZE)
test_input[0, 2:26, 13:16] = 1.0 # digit-1-like pattern
output = quantized_model(test_input)
pred = output.argmax(dim=1).item()
logger.info(f" Digit-1 pattern -> predicted: {pred}")

export_to_pte(quantized_model, example_input, args.output)
logger.info("Export complete!")
logger.info(f"Input shape: (1, {IMAGE_SIZE}, {IMAGE_SIZE})")
logger.info("Input format: Float [0.0, 1.0] (same as FP32 variant)")


if __name__ == "__main__":
main()
Loading
Loading