Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .ci/scripts/build-qnn-sdk.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
# LICENSE file in the root directory of this source tree.

set -eux
set -o xtrace

build_qnn_backend() {
echo "Start building qnn backend."
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
set -e
set -o xtrace

if [[ -z ${QNN_SDK_ROOT} ]]; then
echo "Please export QNN_SDK_ROOT=/path/to/qnn_sdk"
Expand Down
7 changes: 0 additions & 7 deletions build/build_android_llm_demo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ build_android_native_library() {
TOKENIZER="$2"
ANDROID_NDK="${ANDROID_NDK:-/opt/ndk}"
CMAKE_OUT="cmake-out-android-${ANDROID_ABI}"
if [[ $TOKENIZER == "tiktoken" ]]; then
EXECUTORCH_USE_TIKTOKEN=ON
else
EXECUTORCH_USE_TIKTOKEN=OFF
fi

cmake . -DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DCMAKE_TOOLCHAIN_FILE="${ANDROID_NDK}/build/cmake/android.toolchain.cmake" \
Expand Down Expand Up @@ -54,7 +49,6 @@ build_android_native_library() {
-DANDROID_ABI="$ANDROID_ABI" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand All @@ -72,7 +66,6 @@ build_android_native_library() {
-DEXECUTORCH_ENABLE_LOGGING=ON \
-DEXECUTORCH_LOG_LEVEL=Info \
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

Expand Down
12 changes: 2 additions & 10 deletions examples/demo-apps/android/LlamaDemo/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,14 @@ Note: `<path_to_android_ndk>` is the root for the NDK, which is usually under
`~/Library/Android/sdk/ndk/XX.Y.ZZZZZ` for macOS, and contains NOTICE and README.md.
We use `<path_to_android_ndk>/build/cmake/android.toolchain.cmake` for CMake to cross-compile.

3. (Optional) If you need to use tiktoken as the tokenizer (for LLaMA3), set
`EXECUTORCH_USE_TIKTOKEN=ON` and later CMake will use it as the tokenizer.
If you need to run other models like LLaMA2, skip this skip.

```bash
export EXECUTORCH_USE_TIKTOKEN=ON # Only for LLaMA3
```

4. Build the Android Java extension code:
3. Build the Android Java extension code:
```bash
pushd extension/android
./gradlew build
popd
```

5. Run the following command set up the required JNI library:
4. Run the following command set up the required JNI library:
```bash
pushd examples/demo-apps/android/LlamaDemo
./gradlew :app:setup
Expand Down
2 changes: 0 additions & 2 deletions examples/demo-apps/android/LlamaDemo/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ cmake examples/models/llama2 \
-DANDROID_ABI="$ANDROID_ABI" \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \
-DEXECUTORCH_BUILD_XNNPACK=ON \
Expand All @@ -50,7 +49,6 @@ cmake extension/android \
-DANDROID_PLATFORM=android-23 \
-DCMAKE_INSTALL_PREFIX="${CMAKE_OUT}" \
-DEXECUTORCH_BUILD_LLAMA_JNI=ON \
-DEXECUTORCH_USE_TIKTOKEN="${EXECUTORCH_USE_TIKTOKEN}" \
-DCMAKE_BUILD_TYPE=Release \
-B"${CMAKE_OUT}"/extension/android

Expand Down
19 changes: 0 additions & 19 deletions examples/models/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ project(llama_runner)
# Duplicating options as root CMakeLists.txt
option(EXECUTORCH_BUILD_KERNELS_OPTIMIZED "Build the optimized kernels" OFF)

option(EXECUTORCH_USE_TIKTOKEN "Use Tiktoken as a tokenizer" OFF)

include(CMakeDependentOption)
#
# pthreadpool: build pthreadpool library. Disable on unsupported platforms
Expand Down Expand Up @@ -94,23 +92,6 @@ endif()

# llama_runner library
add_subdirectory(runner)
if(EXECUTORCH_USE_TIKTOKEN)
# find RE2 for tokenizer
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../../extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
target_link_libraries(llama_runner PUBLIC re2::re2)
endif()

set(link_libraries gflags)
set(_srcs main.cpp)
Expand Down
3 changes: 0 additions & 3 deletions examples/models/llama2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ Note for Mac users: There's a known linking issue with Xcode 15.1. Refer to the
cmake --build cmake-out/examples/models/llama2 -j16 --config Release
```

For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner.

3. Run model. Run options available [here](https://github.com/pytorch/executorch/blob/main/examples/models/llama2/main.cpp#L18-L40).
```
cmake-out/examples/models/llama2/llama_main --model_path=<model pte file> --tokenizer_path=<tokenizer.bin> --prompt=<prompt>
Expand Down Expand Up @@ -283,7 +281,6 @@ cmake -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \

cmake --build cmake-out-android/examples/models/llama2 -j16 --config Release
```
For Llama3, add `-DEXECUTORCH_USE_TIKTOKEN=ON` option when building the llama runner.

**2. Run on Android via adb shell**

Expand Down
36 changes: 25 additions & 11 deletions examples/models/llama2/runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,13 @@ target_include_directories(
extension_module INTERFACE ${_common_include_directories}
)

if(EXECUTORCH_USE_TIKTOKEN)
list(
APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
)
set(_preprocessor_flag -DET_USE_TIKTOKEN)
endif()
list(
APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../../extension/llm/tokenizer/tiktoken.cpp
)
list(APPEND _llama_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../tokenizer/llama_tiktoken.cpp
)

if(CMAKE_TOOLCHAIN_IOS
OR ANDROID
Expand All @@ -63,7 +60,24 @@ else()
add_library(llama_runner SHARED ${_llama_runner__srcs})
endif()

set(llama_runner_deps executorch extension_module extension_data_loader)
# find RE2 for tokenizer, build tiktoken
set(ABSL_ENABLE_INSTALL ON)
set(ABSL_PROPAGATE_CXX_STD ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${EXECUTORCH_ROOT}/extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})

set(llama_runner_deps executorch extension_module extension_data_loader
re2::re2
)

target_link_libraries(llama_runner PUBLIC ${llama_runner_deps})

Expand Down
25 changes: 13 additions & 12 deletions examples/models/llama2/runner/runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,8 @@
#include <executorch/extension/llm/runner/util.h>
#include <executorch/extension/runner_util/managed_tensor.h>

#if ET_USE_TIKTOKEN
#include <executorch/examples/models/llama2/tokenizer/llama_tiktoken.h>
#else /* BPE */
#include <executorch/extension/llm/tokenizer/bpe_tokenizer.h>
#endif /* ET_USE_TIKTOKEN*/

namespace torch::executor {
namespace {
Expand All @@ -46,13 +43,6 @@ Runner::Runner(
: temperature_(temperature),
module_(std::make_unique<Module>(model_path, Module::LoadMode::File)),
tokenizer_path_(tokenizer_path),
tokenizer_(
#if ET_USE_TIKTOKEN
get_tiktoken_for_llama()
#else
std::make_unique<BPETokenizer>()
#endif
),
metadata_({
{kAppendEosToPrompt, false},
{kEnableDynamicShape, false},
Expand All @@ -79,8 +69,19 @@ Error Runner::load() {
return Error::Ok;
}
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method("forward"));

tokenizer_->load(tokenizer_path_);
// load tokenizer
tokenizer_ = nullptr;
tokenizer_ = std::make_unique<BPETokenizer>();
Error err = tokenizer_->load(tokenizer_path_);
if (err == Error::InvalidArgument) {
ET_LOG(
Info,
"Failed to load %s as a BPETokenizer artifact, trying Tiktoken",
tokenizer_path_.c_str());
tokenizer_.reset();
tokenizer_ = get_tiktoken_for_llama();
tokenizer_->load(tokenizer_path_);
}

ET_LOG(Info, "Reading metadata from model");

Expand Down
8 changes: 1 addition & 7 deletions examples/models/llama2/runner/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@ def _get_operator_lib(aten = False):
else:
return ["//executorch/configurations:optimized_native_cpu_ops", "//executorch/extension/llm/custom_ops:custom_ops"]

def use_tiktoken():
return native.read_config("llama", "use_tiktoken", "0") == "1"

def define_common_targets():
for aten in (True, False):
aten_suffix = "_aten" if aten else ""
Expand All @@ -26,7 +23,6 @@ def define_common_targets():
preprocessor_flags = [
"-DUSE_ATEN_LIB",
] if aten else [],
exported_preprocessor_flags = ["-DET_USE_TIKTOKEN"] if use_tiktoken() else [],
visibility = [
"@EXECUTORCH_CLIENTS",
],
Expand All @@ -43,11 +39,9 @@ def define_common_targets():
"//executorch/kernels/quantized:generated_lib" + aten_suffix,
"//executorch/runtime/core/exec_aten:lib" + aten_suffix,
"//executorch/runtime/core/exec_aten/util:tensor_util" + aten_suffix,
] + ([
"//executorch/examples/models/llama2/tokenizer:tiktoken",
] if use_tiktoken() else [
"//executorch/extension/llm/tokenizer:bpe_tokenizer",
]) + (_get_operator_lib(aten)) + ([
] + (_get_operator_lib(aten)) + ([
# Vulkan API currently cannot build on some platforms (e.g. Apple, FBCODE)
# Therefore enable it explicitly for now to avoid failing tests
"//executorch/backends/vulkan:vulkan_backend_lib",
Expand Down
2 changes: 1 addition & 1 deletion examples/qualcomm/oss_scripts/llama2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ target_include_directories(
)
target_link_libraries(
qnn_llama_runner qnn_executorch_backend full_portable_ops_lib
extension_data_loader extension_module gflags
extension_data_loader extension_module gflags re2::re2
)
target_compile_options(qnn_llama_runner PUBLIC ${_common_compile_options})
3 changes: 1 addition & 2 deletions examples/qualcomm/qaihub_scripts/llama/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ target_include_directories(
)
target_link_libraries(
qaihub_llama2_7b_runner qnn_executorch_backend executorch_no_prim_ops
extension_data_loader extension_module gflags
extension_data_loader extension_module gflags re2::re2
)
target_compile_options(
qaihub_llama2_7b_runner PUBLIC ${_common_compile_options}
Expand Down Expand Up @@ -71,7 +71,6 @@ list(
_qaihub_llama3_8b_runner__srcs
${CMAKE_CURRENT_SOURCE_DIR}/../../../models/llama2/tokenizer/llama_tiktoken.cpp
)
set(_preprocessor_flag -DET_USE_TIKTOKEN)

# build qaihub llama3 8b runner
add_executable(qaihub_llama3_8b_runner ${_qaihub_llama3_8b_runner__srcs})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ target_include_directories(
)
target_link_libraries(
qaihub_stable_diffusion_runner qnn_executorch_backend executorch_no_prim_ops
extension_data_loader extension_module gflags
extension_data_loader extension_module gflags re2::re2
)
target_compile_options(
qaihub_stable_diffusion_runner PUBLIC ${_common_compile_options}
Expand Down
29 changes: 14 additions & 15 deletions extension/android/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,18 @@ if(EXECUTORCH_BUILD_LLAMA_JNI)
quantized_ops_lib
)
target_compile_options(executorch_llama_jni PUBLIC ${_common_compile_options})
if(EXECUTORCH_USE_TIKTOKEN)
set(ABSL_ENABLE_INSTALL ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
target_link_libraries(executorch_llama_jni re2::re2)
endif()
# link re2
set(ABSL_ENABLE_INSTALL ON)
set(_pic_flag ${CMAKE_POSITION_INDEPENDENT_CODE})
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/abseil-cpp
${CMAKE_CURRENT_BINARY_DIR}/abseil-cpp
)
add_subdirectory(
${CMAKE_CURRENT_SOURCE_DIR}/../../extension/llm/third-party/re2
${CMAKE_CURRENT_BINARY_DIR}/re2
)
set(CMAKE_POSITION_INDEPENDENT_CODE ${_pic_flag})
target_link_libraries(executorch_llama_jni re2::re2)
endif()
47 changes: 47 additions & 0 deletions extension/llm/third-party/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
load("@fbsource//xplat/executorch/build:runtime_wrapper.bzl", "runtime")

oncall("executorch")

runtime.cxx_library(
name = "abseil",
public_include_directories = ["abseil-cpp"],
srcs = glob(
["abseil-cpp/absl/**/*.cc"],
exclude = [
"abseil-cpp/absl/**/*test*.cc",
"abseil-cpp/absl/**/*mock*.cc",
"abseil-cpp/absl/**/*matchers*.cc",
"abseil-cpp/absl/**/*benchmark*.cc",
],
),
exported_linker_flags = select(
{
"DEFAULT": [],
"ovr_config//os:macos": ["-Wl,-framework,CoreFoundation"],
},
),
visibility = ["PUBLIC"],
_is_external_target = True,
)

runtime.cxx_library(
name = "re2",
public_include_directories = ["re2"],
srcs = glob(
[
"re2/re2/**/*.cc",
"re2/util/**/*.cc",
],
exclude = [
"re2/re2/**/*test*.cc",
"re2/re2/testing/*.cc",
"re2/re2/fuzzing/*.cc",
"re2/re2/**/*benchmark*.cc",
],
),
exported_deps = [
":abseil",
],
visibility = ["PUBLIC"],
_is_external_target = True,
)
2 changes: 1 addition & 1 deletion shim/xplat/executorch/build/env_interface.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ _EXTERNAL_DEPS = {
"libtorch_python": "//third-party:libtorch_python",
"prettytable": "//third-party:prettytable",
"pybind11": "//third-party:pybind11",
"re2": [], # TODO(larryliu0820): Add support
"re2": "//extension/llm/third-party:re2",
"sentencepiece-py": [],
# Core C++ PyTorch functionality like Tensor and ScalarType.
"torch-core-cpp": "//third-party:libtorch",
Expand Down
Loading