Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ cmake_install_executorch_libraries() {
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
-DEXECUTORCH_BUILD_QNN="$QNN" \
-DQNN_SDK_ROOT="$QNN_SDK_ROOT" \
-Bcmake-out .
-DQNN_SDK_ROOT="$QNN_SDK_ROOT"
cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE"
}

Expand Down
22 changes: 3 additions & 19 deletions examples/models/llama/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -269,33 +269,17 @@ You can export and run the original Llama 3 8B instruct model.

1. Build executorch with optimized CPU performance as follows. Build options available [here](https://github.com/pytorch/executorch/blob/main/CMakeLists.txt#L59).
```
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=cmake-out \
-DEXECUTORCH_ENABLE_LOGGING=1 \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-Bcmake-out .
cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out

cmake --build cmake-out -j16 --target install --config Release
```
Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the section of Common Issues and Mitigations below for solutions.

2. Build llama runner.
```
cmake -DPYTHON_EXECUTABLE=python \
-DCMAKE_INSTALL_PREFIX=cmake-out \
cmake -DCMAKE_INSTALL_PREFIX=cmake-out \
-DBUILD_TESTING=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-DSUPPORT_REGEX_LOOKAHEAD=ON
-Bcmake-out/examples/models/llama \
examples/models/llama

Expand Down
9 changes: 7 additions & 2 deletions extension/llm/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ target_include_directories(
add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs})

# add tokenizers
set(SUPPORT_REGEX_LOOKAHEAD ON)
# llama/runner/CMakeLists.txt builds a shared library libllama_runner.so that
# transitively depends on tokenizers. Need to build tokenizers with -fPIC.
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/tokenizers
${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/tokenizers
Expand All @@ -54,8 +58,9 @@ set(runner_deps executorch_core extension_module extension_tensor tokenizers)
target_link_libraries(extension_llm_runner PUBLIC ${runner_deps})

target_include_directories(
extension_llm_runner INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
extension_llm_runner
INTERFACE ${_common_include_directories}
${EXECUTORCH_ROOT}/extension/llm/tokenizers/include
)

if(BUILD_TESTING)
Expand Down
Loading