diff --git a/.ci/scripts/build-qnn-sdk.sh b/.ci/scripts/build-qnn-sdk.sh index 260072f7342..ec3a8a39e37 100644 --- a/.ci/scripts/build-qnn-sdk.sh +++ b/.ci/scripts/build-qnn-sdk.sh @@ -6,6 +6,7 @@ # LICENSE file in the root directory of this source tree. set -eux +set -o xtrace build_qnn_backend() { echo "Start building qnn backend." diff --git a/backends/qualcomm/scripts/build.sh b/backends/qualcomm/scripts/build.sh index b63ea6fe8d9..61b363f1a77 100755 --- a/backends/qualcomm/scripts/build.sh +++ b/backends/qualcomm/scripts/build.sh @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. set -e +set -o xtrace if [[ -z ${QNN_SDK_ROOT} ]]; then echo "Please export QNN_SDK_ROOT=/path/to/qnn_sdk" diff --git a/build/build_android_llm_demo.sh b/build/build_android_llm_demo.sh index a11e54f932d..61f54c47cba 100644 --- a/build/build_android_llm_demo.sh +++ b/build/build_android_llm_demo.sh @@ -20,11 +20,6 @@ build_android_native_library() { TOKENIZER="$2" ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}" CMAKE_OUT="cmake-out-android-${ANDROID_ABI}" - if [[ $TOKENIZER == "tiktoken" ]]; then - EXECUTORCH_USE_TIKTOKEN=ON - else - EXECUTORCH_USE_TIKTOKEN=OFF - fi cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \ @@ -54,7 +49,6 @@ build_android_native_library() { -DANDROID_ABI="$ANDROID_ABI" \ -DANDROID_PLATFORM=android-23 \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ - -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -72,7 +66,6 @@ build_android_native_library() { -DEXECUTORCH_ENABLE_LOGGING=ON \ -DEXECUTORCH_LOG_LEVEL=Info \ -DEXECUTORCH_BUILD_LLAMA_JNI=ON \ - -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \ -DCMAKE_BUILD_TYPE=Release \ -B"${CMAKE_OUT}"/extension/android diff --git a/examples/demo-apps/android/LlamaDemo/README.md b/examples/demo-apps/android/LlamaDemo/README.md index 7bb36657da3..fc58d70a2f1 100644 --- a/examples/demo-apps/android/LlamaDemo/README.md +++ b/examples/demo-apps/android/LlamaDemo/README.md @@ -64,22 +64,14 @@ Note: `` is the root for the NDK, which is usually under `~/Library/Android/sdk/ndk/XX.Y.ZZZZZ` for macOS, and contains NOTICE and README.md. We use `/build/cmake/android.toolchain.cmake` for CMake to cross-compile. -3. (Optional) If you need to use tiktoken as the tokenizer (for LLaMA3), set -`EXECUTORCH_USE_TIKTOKEN=ON` and later CMake will use it as the tokenizer. -If you need to run other models like LLaMA2, skip this skip. - -```bash -export EXECUTORCH_USE_TIKTOKEN=ON # Only for LLaMA3 -``` - -4. Build the Android Java extension code: +3. Build the Android Java extension code: ```bash pushd extension/android ./gradlew build popd ``` -5. Run the following command set up the required JNI library: +4. Run the following command set up the required JNI library: ```bash pushd examples/demo-apps/android/LlamaDemo ./gradlew :app:setup diff --git a/examples/demo-apps/android/LlamaDemo/setup.sh b/examples/demo-apps/android/LlamaDemo/setup.sh index 39a50f9b968..5b3244fbcce 100644 --- a/examples/demo-apps/android/LlamaDemo/setup.sh +++ b/examples/demo-apps/android/LlamaDemo/setup.sh @@ -35,7 +35,6 @@ cmake examples/models/llama2 \ -DANDROID_ABI="$ANDROID_ABI" \ -DANDROID_PLATFORM=android-23 \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ - -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ -DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ -DEXECUTORCH_BUILD_XNNPACK=ON \ @@ -50,7 +49,6 @@ cmake extension/android \ -DANDROID_PLATFORM=android-23 \ -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \ -DEXECUTORCH_BUILD_LLAMA_JNI=ON \ - -DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \ -DCMAKE_BUILD_TYPE=Release \ -B"${CMAKE_OUT}"/extension/android diff --git a/examples/models/llama2/CMakeLists.txt b/examples/models/llama2/CMakeLists.txt index 8b82fdda12f..b517641f408 100644 --- a/examples/models/llama2/CMakeLists.txt +++ b/examples/models/llama2/CMakeLists.txt @@ -21,8 +21,6 @@ project(llama_runner) # Duplicating options as root CMakeLists.txt option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF) -option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF) - include(CMakeDependentOption) # # pthreadpool: build pthreadpool library. Disable on unsupported platforms @@ -94,23 +92,6 @@ endif() # llama_runner library add_subdirectory(runner) -if(EXECUTORCH_USE_TIKTOKEN) - # find RE2 for tokenizer - set(ABSL_ENABLE_INSTALL ON) - set(ABSL_PROPAGATE_CXX_STD ON) - set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/abseil-cpp - ${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp - ) - add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/re2 - ${CMAKE_CURRENT_BINARY_DIR}/re2 - ) - set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) - target_link_libraries(llama_runner PUBLIC re2::re2) -endif() set(link_libraries gflags) set(_srcs main.cpp) diff --git a/examples/models/llama2/README.md b/examples/models/llama2/README.md index b8a260865b5..ea95c7f965c 100644 --- a/examples/models/llama2/README.md +++ b/examples/models/llama2/README.md @@ -227,8 +227,6 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the cmake --build cmake-out/examples/models/llama2 -j16 --config Release ``` -For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner. - 3. Run model. Run options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/llama2/main.cpp#L18-L40). ``` cmake-out/examples/models/llama2/llama_main --model_path= --tokenizer_path= --prompt= @@ -283,7 +281,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \ cmake --build cmake-out-android/examples/models/llama2 -j16 --config Release ``` -For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner. **2. Run on Android via adb shell** diff --git a/examples/models/llama2/runner/CMakeLists.txt b/examples/models/llama2/runner/CMakeLists.txt index c99a54982aa..abad63a3b5f 100644 --- a/examples/models/llama2/runner/CMakeLists.txt +++ b/examples/models/llama2/runner/CMakeLists.txt @@ -41,16 +41,13 @@ target_include_directories( extension_module INTERFACE ${_common_include_directories} ) -if(EXECUTORCH_USE_TIKTOKEN) - list( - APPEND _llama_runner__srcs - ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp - ) - list(APPEND _llama_runner__srcs - ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp - ) - set(_preprocessor_flag -DET_USE_TIKTOKEN) -endif() +list( + APPEND _llama_runner__srcs + ${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp +) +list(APPEND _llama_runner__srcs + ${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp +) if(CMAKE_TOOLCHAIN_IOS OR ANDROID @@ -63,7 +60,24 @@ else() add_library(llama_runner SHARED ${_llama_runner__srcs}) endif() -set(llama_runner_deps executorch extension_module extension_data_loader) +# find RE2 for tokenizer, build tiktoken +set(ABSL_ENABLE_INSTALL ON) +set(ABSL_PROPAGATE_CXX_STD ON) +set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +add_subdirectory( + ${EXECUTORCH_ROOT}/extension/llm/third-party/abseil-cpp + ${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp +) +add_subdirectory( + ${EXECUTORCH_ROOT}/extension/llm/third-party/re2 + ${CMAKE_CURRENT_BINARY_DIR}/re2 +) +set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) + +set(llama_runner_deps executorch extension_module extension_data_loader + re2::re2 +) target_link_libraries(llama_runner PUBLIC ${llama_runner_deps}) diff --git a/examples/models/llama2/runner/runner.cpp b/examples/models/llama2/runner/runner.cpp index 7a2fa676628..8b9e6865516 100644 --- a/examples/models/llama2/runner/runner.cpp +++ b/examples/models/llama2/runner/runner.cpp @@ -16,11 +16,8 @@ #include #include -#if ET_USE_TIKTOKEN #include -#else /* BPE */ #include -#endif /* ET_USE_TIKTOKEN*/ namespace torch::executor { namespace { @@ -46,13 +43,6 @@ Runner::Runner( : temperature_(temperature), module_(std::make_unique(model_path, Module::LoadMode::File)), tokenizer_path_(tokenizer_path), - tokenizer_( -#if ET_USE_TIKTOKEN - get_tiktoken_for_llama() -#else - std::make_unique() -#endif - ), metadata_({ {kAppendEosToPrompt, false}, {kEnableDynamicShape, false}, @@ -79,8 +69,19 @@ Error Runner::load() { return Error::Ok; } ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward")); - - tokenizer_->load(tokenizer_path_); + // load tokenizer + tokenizer_ = nullptr; + tokenizer_ = std::make_unique(); + Error err = tokenizer_->load(tokenizer_path_); + if (err == Error::InvalidArgument) { + ET_LOG( + Info, + "Failed to load %s as a BPETokenizer artifact, trying Tiktoken", + tokenizer_path_.c_str()); + tokenizer_.reset(); + tokenizer_ = get_tiktoken_for_llama(); + tokenizer_->load(tokenizer_path_); + } ET_LOG(Info, "Reading metadata from model"); diff --git a/examples/models/llama2/runner/targets.bzl b/examples/models/llama2/runner/targets.bzl index 3ffc10421fc..475c5d92ab1 100644 --- a/examples/models/llama2/runner/targets.bzl +++ b/examples/models/llama2/runner/targets.bzl @@ -8,9 +8,6 @@ def _get_operator_lib(aten = False): else: return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"] -def use_tiktoken(): - return native.read_config("llama", "use_tiktoken", "0") == "1" - def define_common_targets(): for aten in (True, False): aten_suffix = "_aten" if aten else "" @@ -26,7 +23,6 @@ def define_common_targets(): preprocessor_flags = [ "-DUSE_ATEN_LIB", ] if aten else [], - exported_preprocessor_flags = ["-DET_USE_TIKTOKEN"] if use_tiktoken() else [], visibility = [ "@EXECUTORCH_CLIENTS", ], @@ -43,11 +39,9 @@ def define_common_targets(): "//executorch/kernels/quantized:generated_lib" + aten_suffix, "//executorch/runtime/core/exec_aten:lib" + aten_suffix, "//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix, - ] + ([ "//executorch/examples/models/llama2/tokenizer:tiktoken", - ] if use_tiktoken() else [ "//executorch/extension/llm/tokenizer:bpe_tokenizer", - ]) + (_get_operator_lib(aten)) + ([ + ] + (_get_operator_lib(aten)) + ([ # Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE) # Therefore enable it explicitly for now to avoid failing tests "//executorch/backends/vulkan:vulkan_backend_lib", diff --git a/examples/qualcomm/oss_scripts/llama2/CMakeLists.txt b/examples/qualcomm/oss_scripts/llama2/CMakeLists.txt index 7b59120d713..f02da300334 100644 --- a/examples/qualcomm/oss_scripts/llama2/CMakeLists.txt +++ b/examples/qualcomm/oss_scripts/llama2/CMakeLists.txt @@ -24,6 +24,6 @@ target_include_directories( ) target_link_libraries( qnn_llama_runner qnn_executorch_backend full_portable_ops_lib - extension_data_loader extension_module gflags + extension_data_loader extension_module gflags re2::re2 ) target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options}) diff --git a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt index 674aa2b72fe..2ca3364905c 100644 --- a/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt +++ b/examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt @@ -31,7 +31,7 @@ target_include_directories( ) target_link_libraries( qaihub_llama2_7b_runner qnn_executorch_backend executorch_no_prim_ops - extension_data_loader extension_module gflags + extension_data_loader extension_module gflags re2::re2 ) target_compile_options( qaihub_llama2_7b_runner PUBLIC ${_common_compile_options} @@ -71,7 +71,6 @@ list( _qaihub_llama3_8b_runner__srcs ${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama2/tokenizer/llama_tiktoken.cpp ) -set(_preprocessor_flag -DET_USE_TIKTOKEN) # build qaihub llama3 8b runner add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs}) diff --git a/examples/qualcomm/qaihub_scripts/stable_diffusion/CMakeLists.txt b/examples/qualcomm/qaihub_scripts/stable_diffusion/CMakeLists.txt index b0cec2d3005..affe666234a 100644 --- a/examples/qualcomm/qaihub_scripts/stable_diffusion/CMakeLists.txt +++ b/examples/qualcomm/qaihub_scripts/stable_diffusion/CMakeLists.txt @@ -20,7 +20,7 @@ target_include_directories( ) target_link_libraries( qaihub_stable_diffusion_runner qnn_executorch_backend executorch_no_prim_ops - extension_data_loader extension_module gflags + extension_data_loader extension_module gflags re2::re2 ) target_compile_options( qaihub_stable_diffusion_runner PUBLIC ${_common_compile_options} diff --git a/extension/android/CMakeLists.txt b/extension/android/CMakeLists.txt index 5982cd16e10..4c2abeb4f6e 100644 --- a/extension/android/CMakeLists.txt +++ b/extension/android/CMakeLists.txt @@ -129,19 +129,18 @@ if(EXECUTORCH_BUILD_LLAMA_JNI) quantized_ops_lib ) target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options}) - if(EXECUTORCH_USE_TIKTOKEN) - set(ABSL_ENABLE_INSTALL ON) - set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) - set(CMAKE_POSITION_INDEPENDENT_CODE ON) - add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/abseil-cpp - ${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp - ) - add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/re2 - ${CMAKE_CURRENT_BINARY_DIR}/re2 - ) - set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) - target_link_libraries(executorch_llama_jni re2::re2) - endif() + # link re2 + set(ABSL_ENABLE_INSTALL ON) + set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE}) + set(CMAKE_POSITION_INDEPENDENT_CODE ON) + add_subdirectory( + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/abseil-cpp + ${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp + ) + add_subdirectory( + ${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/re2 + ${CMAKE_CURRENT_BINARY_DIR}/re2 + ) + set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag}) + target_link_libraries(executorch_llama_jni re2::re2) endif() diff --git a/extension/llm/third-party/TARGETS b/extension/llm/third-party/TARGETS new file mode 100644 index 00000000000..978c12371fe --- /dev/null +++ b/extension/llm/third-party/TARGETS @@ -0,0 +1,47 @@ +load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime") + +oncall("executorch") + +runtime.cxx_library( + name = "abseil", + public_include_directories = ["abseil-cpp"], + srcs = glob( + ["abseil-cpp/absl/**/*.cc"], + exclude = [ + "abseil-cpp/absl/**/*test*.cc", + "abseil-cpp/absl/**/*mock*.cc", + "abseil-cpp/absl/**/*matchers*.cc", + "abseil-cpp/absl/**/*benchmark*.cc", + ], + ), + exported_linker_flags = select( + { + "DEFAULT": [], + "ovr_config//os:macos": ["-Wl,-framework,CoreFoundation"], + }, + ), + visibility = ["PUBLIC"], + _is_external_target = True, +) + +runtime.cxx_library( + name = "re2", + public_include_directories = ["re2"], + srcs = glob( + [ + "re2/re2/**/*.cc", + "re2/util/**/*.cc", + ], + exclude = [ + "re2/re2/**/*test*.cc", + "re2/re2/testing/*.cc", + "re2/re2/fuzzing/*.cc", + "re2/re2/**/*benchmark*.cc", + ], + ), + exported_deps = [ + ":abseil", + ], + visibility = ["PUBLIC"], + _is_external_target = True, +) diff --git a/shim/xplat/executorch/build/env_interface.bzl b/shim/xplat/executorch/build/env_interface.bzl index 27d2887b668..5b0acd36dab 100644 --- a/shim/xplat/executorch/build/env_interface.bzl +++ b/shim/xplat/executorch/build/env_interface.bzl @@ -41,7 +41,7 @@ _EXTERNAL_DEPS = { "libtorch_python": "//third-party:libtorch_python", "prettytable": "//third-party:prettytable", "pybind11": "//third-party:pybind11", - "re2": [], # TODO(larryliu0820): Add support + "re2": "//extension/llm/third-party:re2", "sentencepiece-py": [], # Core C++ PyTorch functionality like Tensor and ScalarType. "torch-core-cpp": "//third-party:libtorch",