diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh index 88fedabba27..9f183528719 100644 --- a/.ci/scripts/test_llama.sh +++ b/.ci/scripts/test_llama.sh @@ -156,8 +156,7 @@ cmake_install_executorch_libraries() { -DCMAKE_INSTALL_PREFIX=cmake-out \ -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \ -DEXECUTORCH_BUILD_QNN="$QNN" \ - -DQNN_SDK_ROOT="$QNN_SDK_ROOT" \ - -Bcmake-out . + -DQNN_SDK_ROOT="$QNN_SDK_ROOT" cmake --build cmake-out -j9 --target install --config "$CMAKE_BUILD_TYPE" } diff --git a/examples/models/llama/README.md b/examples/models/llama/README.md index 38eb195fcd4..c6f0350fff7 100644 --- a/examples/models/llama/README.md +++ b/examples/models/llama/README.md @@ -269,18 +269,7 @@ You can export and run the original Llama 3 8B instruct model. 1. Build executorch with optimized CPU performance as follows. Build options available [here](https://github.com/pytorch/executorch/blob/main/CMakeLists.txt#L59). ``` - cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ - -DEXECUTORCH_ENABLE_LOGGING=1 \ - -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ - -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ - -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -Bcmake-out . + cmake --preset llm -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=cmake-out cmake --build cmake-out -j16 --target install --config Release ``` @@ -288,14 +277,9 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the 2. Build llama runner. ``` - cmake -DPYTHON_EXECUTABLE=python \ - -DCMAKE_INSTALL_PREFIX=cmake-out \ + cmake -DCMAKE_INSTALL_PREFIX=cmake-out \ + -DBUILD_TESTING=OFF \ -DCMAKE_BUILD_TYPE=Release \ - -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ - -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ - -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DSUPPORT_REGEX_LOOKAHEAD=ON -Bcmake-out/examples/models/llama \ examples/models/llama diff --git a/extension/llm/runner/CMakeLists.txt b/extension/llm/runner/CMakeLists.txt index 05f47e55c74..7f3f8ad1519 100644 --- a/extension/llm/runner/CMakeLists.txt +++ b/extension/llm/runner/CMakeLists.txt @@ -44,6 +44,10 @@ target_include_directories( add_library(extension_llm_runner STATIC ${_extension_llm_runner__srcs}) # add tokenizers +set(SUPPORT_REGEX_LOOKAHEAD ON) +# llama/runner/CMakeLists.txt builds a shared library libllama_runner.so that +# transitively depends on tokenizers. Need to build tokenizers with -fPIC. +set(CMAKE_POSITION_INDEPENDENT_CODE ON) add_subdirectory( ${EXECUTORCH_ROOT}/extension/llm/tokenizers ${CMAKE_CURRENT_BINARY_DIR}/../../../extension/llm/tokenizers @@ -54,8 +58,9 @@ set(runner_deps executorch_core extension_module extension_tensor tokenizers) target_link_libraries(extension_llm_runner PUBLIC ${runner_deps}) target_include_directories( - extension_llm_runner INTERFACE ${_common_include_directories} - ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include + extension_llm_runner + INTERFACE ${_common_include_directories} + ${EXECUTORCH_ROOT}/extension/llm/tokenizers/include ) if(BUILD_TESTING)