From 7d4686af79ab694b281d84c270d7a4a32247b606 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:24:25 -0700 Subject: [PATCH 01/39] add new torchao experimental kernels to torchchat --- .gitignore | 1 + install/.pins/torchao-experimental-pin.txt | 1 + runner/aoti.cmake | 4 ++ runner/et.cmake | 4 ++ torchchat/utils/quantize.py | 47 +++++++++++++++++-- torchchat/utils/scripts/build_native.sh | 17 ++++++- .../scripts/build_torchao_experimental.sh | 16 +++++++ torchchat/utils/scripts/install_utils.sh | 45 ++++++++++++++++++ 8 files changed, 129 insertions(+), 6 deletions(-) create mode 100644 install/.pins/torchao-experimental-pin.txt create mode 100644 torchchat/utils/scripts/build_torchao_experimental.sh diff --git a/.gitignore b/.gitignore index 3f25b76c0..ee856fcd2 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,7 @@ __pycache__/ # Build directories build/android/* et-build/* +torchao-build/* runner-et/cmake-out/* runner-aoti/cmake-out/* cmake-out/ diff --git a/install/.pins/torchao-experimental-pin.txt b/install/.pins/torchao-experimental-pin.txt new file mode 100644 index 000000000..9b101777d --- /dev/null +++ b/install/.pins/torchao-experimental-pin.txt @@ -0,0 +1 @@ +3fa38aaf1276e36845a82fb399e5054718a441c4 diff --git a/runner/aoti.cmake b/runner/aoti.cmake index 156e9bcce..5449f2156 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -28,3 +28,7 @@ if(Torch_FOUND) target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m) set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17) endif() + +if (LINK_TORCHAO_CUSTOM_OPS) + target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") +endif() diff --git a/runner/et.cmake b/runner/et.cmake index 7fc16b1f2..b6eee8a2f 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -111,6 +111,10 @@ if(executorch_FOUND) target_link_libraries(et_run PRIVATE log) endif() + if(LINK_TORCHAO_CUSTOM_OPS) + target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}") + endif() + # Adding target_link_options_shared_lib as commented out below leads to this: # # CMake Error at Utils.cmake:22 (target_link_options): diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index a0d9248a9..df8a39b04 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -96,10 +96,19 @@ def quantize_model( precision = get_precision() try: - # Easier to ask forgiveness than permission - quant_handler = ao_quantizer_class_dict[quantizer]( - groupsize=q_kwargs["groupsize"], device=device, precision=precision - ) + if quantizer == "linear:a8wxdq": + quant_handler = ao_quantizer_class_dict[quantizer]( + device=device, + precision=precision, + bitwidth=q_kwargs.get("bitwidth", 4), + groupsize=q_kwargs.get("groupsize", 128), + has_weight_zeros=q_kwargs.get("has_weight_zeros", False), + ) + else: + # Easier to ask forgiveness than permission + quant_handler = ao_quantizer_class_dict[quantizer]( + groupsize=q_kwargs["groupsize"], device=device, precision=precision + ) except TypeError as e: if "unexpected keyword argument 'device'" in str(e): quant_handler = ao_quantizer_class_dict[quantizer]( @@ -861,3 +870,33 @@ def quantized_model(self) -> nn.Module: "linear:int4": Int4WeightOnlyQuantizer, "linear:a8w4dq": Int8DynActInt4WeightQuantizer, } + +try: + import importlib.util + import sys + import os + torchao_build_path = f"{os.getcwd()}/torchao-build" + + # Try loading quantizer + torchao_experimental_quant_api_spec = importlib.util.spec_from_file_location( + "torchao_experimental_quant_api", + f"{torchao_build_path}/src/ao/torchao/experimental/quant_api.py", + ) + torchao_experimental_quant_api = importlib.util.module_from_spec(torchao_experimental_quant_api_spec) + sys.modules["torchao_experimental_quant_api"] = torchao_experimental_quant_api + torchao_experimental_quant_api_spec.loader.exec_module(torchao_experimental_quant_api) + from torchao_experimental_quant_api import Int8DynActIntxWeightQuantizer + ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightQuantizer + + # Try loading custom op + try: + import glob + libs = glob.glob(f"{torchao_build_path}/cmake-out/liblowbit_op_aten.*") + libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) + torch.ops.load_library(libs[0]) + except Exception as e: + print("Failed to torchao custom op library with error: ", e) + print("Slow fallback kernels will be used.") + +except Exception as e: + print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}") diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index aacd97415..b75342dc2 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -25,6 +25,8 @@ if [ $# -eq 0 ]; then show_help exit 1 fi + +LINK_TORCHAO=OFF while (( "$#" )); do case "$1" in -h|--help) @@ -41,6 +43,11 @@ while (( "$#" )); do TARGET="et" shift ;; + link_torchao) + echo "Linking with torchao custom ops..." + LINK_TORCHAO=ON + shift + ;; *) echo "Invalid option: $1" show_help @@ -72,14 +79,20 @@ if [[ "$TARGET" == "et" ]]; then install_pip_dependencies clone_executorch install_executorch_libs false + + if [[ "$LINK_TORCHAO" == "ON" ]]; then + EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" + EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a" + install_torchao_custom_executorch_ops + fi fi popd # CMake commands if [[ "$TARGET" == "et" ]]; then - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja else - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja fi cmake --build ./cmake-out --target "${TARGET}"_run diff --git a/torchchat/utils/scripts/build_torchao_experimental.sh b/torchchat/utils/scripts/build_torchao_experimental.sh new file mode 100644 index 000000000..1df3e80c6 --- /dev/null +++ b/torchchat/utils/scripts/build_torchao_experimental.sh @@ -0,0 +1,16 @@ +#!/bin/bash +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + + +source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" + +pushd ${TORCHCHAT_ROOT} +find_cmake_prefix_path +clone_torchao +install_torchao_custom_aten_ops +popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index f915402e7..c63234c0f 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -162,3 +162,48 @@ install_executorch_libs() { install_executorch_python_libs $1 } + +clone_torchao() { + echo "Cloning torchao to ${TORCHCHAT_ROOT}/torchao-build/src" + rm -rf ${TORCHCHAT_ROOT}/torchao-build/src + mkdir -p ${TORCHCHAT_ROOT}/torchao-build/src + pushd ${TORCHCHAT_ROOT}/torchao-build/src + echo $pwd + + cp -R /Users/scroy/fbsource/fbcode/pytorch/ao . + # git clone https://github.com/pytorch/ao.git + # cd ao + # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt) + + popd +} + +install_torchao_custom_aten_ops() { + echo "Building torchao custom ops for ATen" + pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op + export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao + + CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out + cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DPLATFORM="ATEN" \ + -S . \ + -B ${CMAKE_OUT_DIR} -G Ninja + cmake --build ${CMAKE_OUT_DIR} +} + +install_torchao_custom_executorch_ops() { + echo "Building torchao custom ops for ExecuTorch" + pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op + export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao + + CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" + cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \ + -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \ + -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \ + -DPLATFORM="EXECUTORCH" \ + -S . \ + -B ${CMAKE_OUT_DIR} -G Ninja + cmake --build ${CMAKE_OUT_DIR} +} From 8ae346eee8b6b478d8ea05d823a14d491e98ef5b Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:53:36 -0700 Subject: [PATCH 02/39] add doc --- docs/quantization.md | 62 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/docs/quantization.md b/docs/quantization.md index 1f619e58e..6245e8b6d 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -118,6 +118,68 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my name is" ``` +## Experimental TorchAO lowbit kernels + +### Use +The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize. +It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false). +The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true). +Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q40 quantization scheme. + +You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization. + +### Setup +To use a8wxdq, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon. + +From the torchchat root directory, run +``` +sh torchchat/utils/scripts/build_torchao_experimental.sh +``` + +This should take about 10 seconds to complete. Once finished, you can use a8wxdq in torchchat. + +Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao when running the scripts the build the runners. + +``` +sh torchchat/utils/scripts/build_native.sh aoti link_torchao +``` + +``` +sh torchchat/utils/scripts/build_native.sh et link_torchao +``` + +### Examples + +#### Eager mode +``` +python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' +``` + +#### torch.compile +``` +python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile +``` + +As with PyTorch in general, you can experiment with performance on a difference number of threads by defining OMP_NUM_THREADS. For example, + +``` +OMP_NUM_THREADS=6 python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile +``` + +#### AOTI +``` +python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-dso llama3.so +python3 torchchat.py generate llama3 --dso-path llama3_1.so --prompt "Hello my name is" +``` + +#### ExecuTorch +``` +python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-pte llama3.pte +``` + +Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file. +Also note that the ExecuTorch op that wraps the new torchao kernel is currently single threaded. + ## Quantization Profiles Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/torchchat/quant_config/) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json` From f9bea2b0445fbd2fbd23ad2cca184c9380132144 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Sat, 14 Sep 2024 20:44:05 -0700 Subject: [PATCH 03/39] update torchao library name --- runner/aoti.cmake | 2 +- runner/et.cmake | 2 +- torchchat/utils/quantize.py | 2 +- torchchat/utils/scripts/install_utils.sh | 19 ++++++++++--------- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/runner/aoti.cmake b/runner/aoti.cmake index 5449f2156..ef7275ede 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -30,5 +30,5 @@ if(Torch_FOUND) endif() if (LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() diff --git a/runner/et.cmake b/runner/et.cmake index b6eee8a2f..5b5d2629d 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -112,7 +112,7 @@ if(executorch_FOUND) endif() if(LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() # Adding target_link_options_shared_lib as commented out below leads to this: diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index df8a39b04..8cfe536e2 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -891,7 +891,7 @@ def quantized_model(self) -> nn.Module: # Try loading custom op try: import glob - libs = glob.glob(f"{torchao_build_path}/cmake-out/liblowbit_op_aten.*") + libs = glob.glob(f"{torchao_build_path}/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten.*") libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) torch.ops.load_library(libs[0]) except Exception as e: diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index c63234c0f..37a2f6a71 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -180,30 +180,31 @@ clone_torchao() { install_torchao_custom_aten_ops() { echo "Building torchao custom ops for ATen" - pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op - export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao + pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out - cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \ - -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DPLATFORM="ATEN" \ + cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DTORCHAO_OP_TARGET="ATEN" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja cmake --build ${CMAKE_OUT_DIR} + + popd } install_torchao_custom_executorch_ops() { echo "Building torchao custom ops for ExecuTorch" - pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op + pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" - cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \ - -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \ -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \ - -DPLATFORM="EXECUTORCH" \ + -DTORCHAO_OP_TARGET="EXECUTORCH" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja cmake --build ${CMAKE_OUT_DIR} + + popd } From c15f06b21f766e9146fcbe080f06140ded3e66cd Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Sat, 14 Sep 2024 21:02:43 -0700 Subject: [PATCH 04/39] typo --- runner/et.cmake | 2 +- torchchat/utils/scripts/install_utils.sh | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/runner/et.cmake b/runner/et.cmake index 5b5d2629d..921dce93c 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -112,7 +112,7 @@ if(executorch_FOUND) endif() if(LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() # Adding target_link_options_shared_lib as commented out below leads to this: diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 37a2f6a71..3abe93ceb 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -195,12 +195,11 @@ install_torchao_custom_aten_ops() { install_torchao_custom_executorch_ops() { echo "Building torchao custom ops for ExecuTorch" pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental - export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \ - -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \ + -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ + -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ -DTORCHAO_OP_TARGET="EXECUTORCH" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja From f7f8bf8752f9552d602d738f1ed99e02a64cb9f9 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 16 Sep 2024 09:29:30 -0700 Subject: [PATCH 05/39] add multithreading to ET runner --- torchchat/utils/scripts/build_native.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index b75342dc2..eecd07884 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -77,7 +77,7 @@ git submodule sync if [[ "$TARGET" == "et" ]]; then find_cmake_prefix_path install_pip_dependencies - clone_executorch + # clone_executorch install_executorch_libs false if [[ "$LINK_TORCHAO" == "ON" ]]; then @@ -90,7 +90,7 @@ popd # CMake commands if [[ "$TARGET" == "et" ]]; then - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja else cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja fi From c677aa5e7677b7590c7e09dea299822d3fcf8a46 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:36:28 -0700 Subject: [PATCH 06/39] update lib names --- runner/aoti.cmake | 2 +- runner/et.cmake | 2 +- torchchat/utils/quantize.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/runner/aoti.cmake b/runner/aoti.cmake index ef7275ede..35e4c1329 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -30,5 +30,5 @@ if(Torch_FOUND) endif() if (LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() diff --git a/runner/et.cmake b/runner/et.cmake index 921dce93c..108102b86 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -112,7 +112,7 @@ if(executorch_FOUND) endif() if(LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}") endif() # Adding target_link_options_shared_lib as commented out below leads to this: diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index 8cfe536e2..041f074c2 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -891,7 +891,7 @@ def quantized_model(self) -> nn.Module: # Try loading custom op try: import glob - libs = glob.glob(f"{torchao_build_path}/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten.*") + libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*") libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) torch.ops.load_library(libs[0]) except Exception as e: From 53c24c317e3bea260cbf32c1496637e699ca0a59 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:50:22 -0700 Subject: [PATCH 07/39] update lib name --- torchchat/utils/scripts/install_utils.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 3abe93ceb..0102d7707 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -184,10 +184,11 @@ install_torchao_custom_aten_ops() { CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ -DTORCHAO_OP_TARGET="ATEN" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja - cmake --build ${CMAKE_OUT_DIR} + cmake --build ${CMAKE_OUT_DIR} --target install --config Release popd } @@ -198,12 +199,13 @@ install_torchao_custom_executorch_ops() { CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ + -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ -DTORCHAO_OP_TARGET="EXECUTORCH" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja - cmake --build ${CMAKE_OUT_DIR} + cmake --build ${CMAKE_OUT_DIR} --target install --config Release popd } From 504375e06e352136c7549d66a97189afd5d23610 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:47:07 -0700 Subject: [PATCH 08/39] update torchao ExecuTorch op lib to static --- install/.pins/et-pin.txt | 2 +- runner/et.cmake | 9 +++++++-- runner/run.cpp | 17 +++++++++-------- torchchat/export.py | 3 +-- torchchat/utils/scripts/build_native.sh | 6 +++--- torchchat/utils/scripts/install_utils.sh | 4 +++- 6 files changed, 24 insertions(+), 17 deletions(-) diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index a6f1373dd..0a15fd2b5 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -91298923a0076c1b41059efb6dad2876426e4b03 +58700faa262ddf45b223353c120ffaf6b2003711 diff --git a/runner/et.cmake b/runner/et.cmake index 108102b86..7715ce656 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -62,7 +62,6 @@ if(executorch_FOUND) set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch) set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack) - list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp) list(APPEND _common_include_directories ${XNNPACK_ROOT}/third-party/cpuinfo/include) @@ -80,7 +79,9 @@ if(executorch_FOUND) et_run PRIVATE executorch extension_module + extension_tensor extension_data_loader + extension_threadpool optimized_kernels quantized_kernels portable_kernels @@ -112,7 +113,11 @@ if(executorch_FOUND) endif() if(LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}") + target_link_libraries(et_run PRIVATE "$") + target_link_libraries(et_run PRIVATE + "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a" + "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a" + ) endif() # Adding target_link_options_shared_lib as commented out below leads to this: diff --git a/runner/run.cpp b/runner/run.cpp index 999ad2fcc..455f9e076 100644 --- a/runner/run.cpp +++ b/runner/run.cpp @@ -39,19 +39,20 @@ torch::Device aoti_device(torch::kCPU); #else // __ET_MODEL__ #include -#include +#include #include #include #include #if defined(ET_USE_ADAPTIVE_THREADS) -#include -#include +#include +#include #endif using exec_aten::ScalarType; using torch::executor::EValue; -using torch::executor::ManagedTensor; +using executorch::extension::TensorPtr; +using executorch::extension::make_tensor_ptr; using torch::executor::Module; using torch::executor::Result; #endif @@ -212,11 +213,11 @@ float* forward(Transformer* transformer, int token, int pos) { .to(torch::kCPU); auto logits = result[0].data_ptr(); #else // __ET_MODEL__ - ManagedTensor pos_managed(pos_buffer, {1}, ScalarType::Long); - ManagedTensor tokens_managed(token_buffer, {1, 1}, ScalarType::Long); + TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); //(pos_buffer, {1}, ScalarType::Long); + TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); //(token_buffer, {1, 1}, ScalarType::Long); std::vector inputs; - auto tmp1 = EValue(tokens_managed.get_aliasing_tensor()); - auto tmp2 = EValue(pos_managed.get_aliasing_tensor()); + auto tmp1 = EValue(tokens_managed); //.get_aliasing_tensor()); + auto tmp2 = EValue(pos_managed); //.get_aliasing_tensor()); inputs.push_back(tmp1); inputs.push_back(tmp2); diff --git a/torchchat/export.py b/torchchat/export.py index affb8b871..b28e8023f 100644 --- a/torchchat/export.py +++ b/torchchat/export.py @@ -194,7 +194,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None): return self.wo(output) def replace_attention_with_custom_sdpa_attention(module: nn.Module): - from executorch.examples.models.llama2.custom_ops import ( # noqa + from executorch.extension.llm.custom_ops import ( # noqa sdpa_with_kv_cache, ) @@ -304,7 +304,6 @@ def export_for_et(model, device, output_path) -> str: edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner()) export_program = edge_manager.to_executorch( ExecutorchBackendConfig( - extract_constant_segment=True, extract_delegate_segments=True, passes=[ QuantFusionPass(), diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index eecd07884..d422f6ae0 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -77,12 +77,12 @@ git submodule sync if [[ "$TARGET" == "et" ]]; then find_cmake_prefix_path install_pip_dependencies - # clone_executorch + clone_executorch install_executorch_libs false if [[ "$LINK_TORCHAO" == "ON" ]]; then - EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" - EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a" + EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" + EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" install_torchao_custom_executorch_ops fi fi diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 0102d7707..9ccf6a924 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -185,6 +185,7 @@ install_torchao_custom_aten_ops() { CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ + -DCMAKE_BUILD_TYPE="Release" \ -DTORCHAO_OP_TARGET="ATEN" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja @@ -200,9 +201,10 @@ install_torchao_custom_executorch_ops() { CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ + -DCMAKE_BUILD_TYPE="Release" \ + -DTORCHAO_OP_TARGET="EXECUTORCH" \ -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ - -DTORCHAO_OP_TARGET="EXECUTORCH" \ -S . \ -B ${CMAKE_OUT_DIR} -G Ninja cmake --build ${CMAKE_OUT_DIR} --target install --config Release From d5dded2261b49ccd8e1e934ebd1c51fc85346c4f Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 18 Sep 2024 13:52:04 -0700 Subject: [PATCH 09/39] remove old comment --- runner/run.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/runner/run.cpp b/runner/run.cpp index 455f9e076..99eb7bfb9 100644 --- a/runner/run.cpp +++ b/runner/run.cpp @@ -213,11 +213,11 @@ float* forward(Transformer* transformer, int token, int pos) { .to(torch::kCPU); auto logits = result[0].data_ptr(); #else // __ET_MODEL__ - TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); //(pos_buffer, {1}, ScalarType::Long); - TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); //(token_buffer, {1, 1}, ScalarType::Long); + TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); + TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); std::vector inputs; - auto tmp1 = EValue(tokens_managed); //.get_aliasing_tensor()); - auto tmp2 = EValue(pos_managed); //.get_aliasing_tensor()); + auto tmp1 = EValue(tokens_managed); + auto tmp2 = EValue(pos_managed); inputs.push_back(tmp1); inputs.push_back(tmp2); From 33450893d1629f644a6ba084e7f870ebc2485f01 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 18 Sep 2024 14:02:59 -0700 Subject: [PATCH 10/39] add dylib for poor perf repro testing --- runner/et.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/runner/et.cmake b/runner/et.cmake index 7715ce656..12c7fca02 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -113,6 +113,7 @@ if(executorch_FOUND) endif() if(LINK_TORCHAO_CUSTOM_OPS) + # target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}") target_link_libraries(et_run PRIVATE "$") target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a" From bf1e72751ee3689796880c911b9737c72b283fd7 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:07:51 -0700 Subject: [PATCH 11/39] change /Users/scroy to /Users/scroy --- torchchat/utils/scripts/install_utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 9ccf6a924..e4bbaadd3 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -170,7 +170,7 @@ clone_torchao() { pushd ${TORCHCHAT_ROOT}/torchao-build/src echo $pwd - cp -R /Users/scroy/fbsource/fbcode/pytorch/ao . + cp -R ${HOME}/fbsource/fbcode/pytorch/ao . # git clone https://github.com/pytorch/ao.git # cd ao # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt) From 0d5da6785d7b158f4078c4e41cc2bc742e184979 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 18 Sep 2024 16:21:30 -0700 Subject: [PATCH 12/39] remove single-threaded reference in docs --- docs/quantization.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/quantization.md b/docs/quantization.md index 6245e8b6d..aea8a8dc6 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -178,7 +178,6 @@ python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"lin ``` Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file. -Also note that the ExecuTorch op that wraps the new torchao kernel is currently single threaded. ## Quantization Profiles From 8ee9d0e85b7be6fdc34307a9e695b359cb52cbad Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 09:23:46 -0700 Subject: [PATCH 13/39] add github workflow for testing --- .github/workflows/pull.yml | 100 +++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index f42a20e22..d7d1f5184 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1035,3 +1035,103 @@ jobs: git submodule update --init ./runner/build_android.sh echo "Tests complete." + + test-torchao-experimental: + strategy: + matrix: + runner: [macos-14-xlarge] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout repo + uses: actions/checkout@v3 + with: + submodules: true + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.10.11 + - name: Setup Xcode + if: runner.os == 'macOS' + uses: maxim-lobanov/setup-xcode@v1 + with: + xcode-version: '15.3' + - name: Print machine info + run: | + uname -a + if [ $(uname -s) == Darwin ]; then + sysctl machdep.cpu.brand_string + sysctl machdep.cpu.core_count + fi + - name: Install torchchat + run: | + echo "Intalling pip3 packages" + ./install/install_requirements.sh + pip3 list + python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' + - name: Install torchao-experimental + id: install-torchao-experimental + run: | + bash torchchat/utils/scripts/build_torchao_experimental.sh + - name: Set git shas + id: setup-hash + run: | + export TORCHCHAT_ROOT=${PWD} + echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV" + - name: Load or install ET + id: install-et + uses: actions/cache@v3 + env: + cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}} + with: + path: ./et-build + key: ${{env.cache-key}} + restore-keys: | + ${{env.cache-key}} + - if: ${{ steps.install-et.outputs.cache-hit != 'true' }} + continue-on-error: true + run: | + echo "Installing ExecuTorch" + bash torchchat/utils/scripts/build_native.sh et link_torchao + - name: Install ET pip + run: | + echo "ET build directory" + ls et-build | cat + + pushd et-build/src/executorch + if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then + echo "Mismatched hash. Make sure branch install_et.sh matches branch from Github cache." + echo "On commit $(git rev-parse HEAD)" + echo "Expected commit ${{env.et-git-hash}}" + exit 1 + fi + pip install . + popd + - name: Install runner AOTI + id: install-runner-aoti + run: | + bash torchchat/utils/scripts/build_native.sh aoti link_torchao + - name: Run inference + run: | + python torchchat.py download stories110M + wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model + + export PRMT="Once upon a time in a land far away" + + echo "Generate eager" + python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' + + echo "Generate compile" + python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile + + echo "Export and run ET (C++ runner)" + python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' + ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" + + echo "Export and run AOTI (C++ runner)" + python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' + ./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}" + + echo "Generate AOTI" + python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}" + + echo "Tests complete." From 3bd1389a7e6cd2761a4fae7f11768ee4baecd816 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 09:52:31 -0700 Subject: [PATCH 14/39] fix torchchat-root in install script --- install/requirements.txt | 2 +- torchchat/utils/scripts/build_native.sh | 10 +--------- torchchat/utils/scripts/install_utils.sh | 4 ++-- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/install/requirements.txt b/install/requirements.txt index bbb1d56d1..bc4592d44 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -12,7 +12,7 @@ tiktoken # Miscellaneous snakeviz sentencepiece -numpy < 2.0 +numpy==1.23.5 gguf lm-eval==0.4.2 blobfile diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index d422f6ae0..85c3cd4c4 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -56,15 +56,7 @@ while (( "$#" )); do esac done -if [ -z "${TORCHCHAT_ROOT}" ]; then - # Get the absolute path of the current script - SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - # Get the absolute path of the parent directory - TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")" - source "$TORCHCHAT_ROOT/scripts/install_utils.sh" -else - source "$TORCHCHAT_ROOT/torchchat/utils/scripts/install_utils.sh" -fi +source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" if [ -z "${ET_BUILD_DIR}" ]; then ET_BUILD_DIR="et-build" diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index e4bbaadd3..e6f8acde7 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -8,8 +8,8 @@ set -ex pipefail if [ -z "$TORCHCHAT_ROOT" ]; then - echo "Defaulting TORCHCHAT_ROOT to $PWD since it is unset." - TORCHCHAT_ROOT=$PWD + TORCHCHAT_ROOT="$(dirname "${BASH_SOURCE[0]}")/../../.." + echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset." fi install_pip_dependencies() { From 2241286329f9c19fed732700abb575940e9bac34 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:25:37 -0700 Subject: [PATCH 15/39] bug fixes --- install/requirements.txt | 2 +- torchchat/model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/install/requirements.txt b/install/requirements.txt index bc4592d44..935fa182b 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -12,7 +12,7 @@ tiktoken # Miscellaneous snakeviz sentencepiece -numpy==1.23.5 +numpy==1.21.3 gguf lm-eval==0.4.2 blobfile diff --git a/torchchat/model.py b/torchchat/model.py index 79bd1f188..ea7710a27 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -932,7 +932,7 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor: from executorch.extension.pybindings import portable_lib as exec_lib # ET changed the way it's loading the custom ops so it's not included in portable_lib but has to be loaded separately. - from executorch.examples.models.llama2.custom_ops import sdpa_with_kv_cache # no-qa + from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # no-qa class PTEModel(nn.Module): def __init__(self, config, path) -> None: From 1803d233e459934c03e14821339ecb774e992e8f Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:49:17 -0700 Subject: [PATCH 16/39] ci fixes --- install/requirements.txt | 2 +- torchchat/utils/scripts/install_utils.sh | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/install/requirements.txt b/install/requirements.txt index 935fa182b..7bb3b74b5 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -12,7 +12,7 @@ tiktoken # Miscellaneous snakeviz sentencepiece -numpy==1.21.3 +numpy>=1.23.5,<2.0 gguf lm-eval==0.4.2 blobfile diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index e6f8acde7..83a52446a 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -8,7 +8,9 @@ set -ex pipefail if [ -z "$TORCHCHAT_ROOT" ]; then - TORCHCHAT_ROOT="$(dirname "${BASH_SOURCE[0]}")/../../.." + # Get the absolute path of the current script + SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")/../../.." echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset." fi From c5173cd66e7ff8b675e8a44db7b1483c7ec254d9 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:00:43 -0700 Subject: [PATCH 17/39] fix --- torchchat/utils/scripts/install_utils.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 83a52446a..50e498ae8 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -10,7 +10,7 @@ set -ex pipefail if [ -z "$TORCHCHAT_ROOT" ]; then # Get the absolute path of the current script SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" - TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")/../../.." + TORCHCHAT_ROOT="$SCRIPT_PATH/../../.." echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset." fi From fb96a394220a57adfbc0c18b301ac5b489cff554 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 11:56:08 -0700 Subject: [PATCH 18/39] ci fixes --- .github/workflows/pull.yml | 31 ++++++------------------------- runner/build_android.sh | 1 + 2 files changed, 7 insertions(+), 25 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index d7d1f5184..93486d412 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -452,7 +452,6 @@ jobs: echo "Intalling pip3 packages" ./install/install_requirements.sh - export TORCHCHAT_ROOT=$PWD ./torchchat/utils/scripts/install_et.sh pip3 list @@ -621,6 +620,9 @@ jobs: python torchchat.py remove stories15m test-mps: + uses: actions/setup-python@v2 + with: + python-version: 3.10.11 uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # neeps MPS, was macos-m1-stable @@ -733,6 +735,9 @@ jobs: echo "Tests complete." test-mps-dtype: + uses: actions/setup-python@v2 + with: + python-version: 3.10.11 uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # needs MPS, was macos-m1-stable @@ -915,30 +920,6 @@ jobs: run: | echo "Installing ExecuTorch" bash torchchat/utils/scripts/build_native.sh et - - name: Install ET pip - run: | - echo "ET build directory" - ls et-build | cat - - pushd et-build/src/executorch - if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then - echo "Mismatched hash. Make sure branch install_et.sh matches branch from Github cache." - echo "On commit $(git rev-parse HEAD)" - echo "Expected commit ${{env.et-git-hash}}" - exit 1 - fi - pip install . - popd - - name: Install runner - run: | - # Pull submodules (re2, abseil) for Tiktoken - git submodule sync - git submodule update --init - - export TORCHCHAT_ROOT=${PWD} - cmake -S . -B ./cmake-out -G Ninja - cmake --build ./cmake-out --target et_run - - name: Run inference run: | python torchchat.py download stories15M diff --git a/runner/build_android.sh b/runner/build_android.sh index c32185957..0d1d0201b 100755 --- a/runner/build_android.sh +++ b/runner/build_android.sh @@ -22,6 +22,7 @@ fi export ET_BUILD_DIR="et-build-android" export CMAKE_OUT_DIR="cmake-out-android" export EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT="OFF" +export EXECUTORCH_BUILD_EXTENSION_TENSOR="ON" export EXECUTORCH_BUILD_KERNELS_CUSTOM="ON" export CMAKE_OUT_DIR="cmake-out-android" # export DCMAKE_INSTALL_PREFIX=cmake-out-android From 4eb7bdae30b3621338c64888dabd6feca4017829 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:01:20 -0700 Subject: [PATCH 19/39] fix ci --- .github/workflows/pull.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 93486d412..f71b668b6 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -620,12 +620,10 @@ jobs: python torchchat.py remove stories15m test-mps: - uses: actions/setup-python@v2 - with: - python-version: 3.10.11 uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # neeps MPS, was macos-m1-stable + python-version: 3.10.11 script: | set -x # NS/MC: Remove previous installation of torch and torchao first @@ -735,12 +733,10 @@ jobs: echo "Tests complete." test-mps-dtype: - uses: actions/setup-python@v2 - with: - python-version: 3.10.11 uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # needs MPS, was macos-m1-stable + python-version: 3.10.11 script: | set -x # NS/MC: Remove previous installation of torch and torchao first From f569c4e27d2db9274be19cede9c5cbc8002cde43 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:37:24 -0700 Subject: [PATCH 20/39] ci fixes --- runner/build_android.sh | 3 --- torchchat/utils/scripts/install_utils.sh | 11 +++-------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/runner/build_android.sh b/runner/build_android.sh index 0d1d0201b..fab222a52 100755 --- a/runner/build_android.sh +++ b/runner/build_android.sh @@ -22,11 +22,8 @@ fi export ET_BUILD_DIR="et-build-android" export CMAKE_OUT_DIR="cmake-out-android" export EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT="OFF" -export EXECUTORCH_BUILD_EXTENSION_TENSOR="ON" export EXECUTORCH_BUILD_KERNELS_CUSTOM="ON" export CMAKE_OUT_DIR="cmake-out-android" -# export DCMAKE_INSTALL_PREFIX=cmake-out-android -# build_runner_et() { rm -rf cmake-out-android diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 50e498ae8..29be634f4 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -104,7 +104,8 @@ COMMON_CMAKE_ARGS="\ -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ - -DEXECUTORCH_BUILD_XNNPACK=ON" + -DEXECUTORCH_BUILD_XNNPACK=ON \ + -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON" install_executorch() { # AOT lib has to be build for model export @@ -155,13 +156,7 @@ install_executorch() { } install_executorch_libs() { - # Install executorch python and C++ libs - export CMAKE_ARGS="\ - ${COMMON_CMAKE_ARGS} \ - -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" - export CMAKE_BUILD_ARGS="--target install" - + install_executorch install_executorch_python_libs $1 } From d7fefb91321151e1e8b077d441b69efc37adeee5 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 12:47:45 -0700 Subject: [PATCH 21/39] ci update --- .github/workflows/pull.yml | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index f71b668b6..38de14d25 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -916,6 +916,9 @@ jobs: run: | echo "Installing ExecuTorch" bash torchchat/utils/scripts/build_native.sh et + - name: Install runner + run: | + bash torchchat/utils/scripts/build_native.sh et - name: Run inference run: | python torchchat.py download stories15M @@ -1069,20 +1072,9 @@ jobs: run: | echo "Installing ExecuTorch" bash torchchat/utils/scripts/build_native.sh et link_torchao - - name: Install ET pip + - name: Install runner run: | - echo "ET build directory" - ls et-build | cat - - pushd et-build/src/executorch - if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then - echo "Mismatched hash. Make sure branch install_et.sh matches branch from Github cache." - echo "On commit $(git rev-parse HEAD)" - echo "Expected commit ${{env.et-git-hash}}" - exit 1 - fi - pip install . - popd + bash torchchat/utils/scripts/build_native.sh et link_torchao - name: Install runner AOTI id: install-runner-aoti run: | From 0d5a97bfe5a6ecf82a31b982eeba20527e760556 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 13:00:16 -0700 Subject: [PATCH 22/39] ci fixes --- .github/workflows/pull.yml | 6 ++++-- torchchat/utils/scripts/build_native.sh | 14 +++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 38de14d25..169aa6ba5 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -918,7 +918,8 @@ jobs: bash torchchat/utils/scripts/build_native.sh et - name: Install runner run: | - bash torchchat/utils/scripts/build_native.sh et + echo "Installing runner" + bash torchchat/utils/scripts/build_native.sh et skip_et_install - name: Run inference run: | python torchchat.py download stories15M @@ -1074,7 +1075,8 @@ jobs: bash torchchat/utils/scripts/build_native.sh et link_torchao - name: Install runner run: | - bash torchchat/utils/scripts/build_native.sh et link_torchao + echo "Installing runner" + bash torchchat/utils/scripts/build_native.sh et skip_et_install link_torchao - name: Install runner AOTI id: install-runner-aoti run: | diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index 85c3cd4c4..48396ba80 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -27,6 +27,7 @@ if [ $# -eq 0 ]; then fi LINK_TORCHAO=OFF +SKIP_ET_INSTALL=OFF while (( "$#" )); do case "$1" in -h|--help) @@ -48,6 +49,11 @@ while (( "$#" )); do LINK_TORCHAO=ON shift ;; + skip_et_install) + echo "Skipping ET install..." + SKIP_ET_INSTALL=ON + shift + ;; *) echo "Invalid option: $1" show_help @@ -68,9 +74,11 @@ git submodule update --init git submodule sync if [[ "$TARGET" == "et" ]]; then find_cmake_prefix_path - install_pip_dependencies - clone_executorch - install_executorch_libs false + if [[ "$SKIP_ET_INSTALL" == "OFF" ]]; then + install_pip_dependencies + clone_executorch + install_executorch_libs false + fi if [[ "$LINK_TORCHAO" == "ON" ]]; then EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" From a3197091e610ff4284676b8acb0e37097262ba75 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 13:10:26 -0700 Subject: [PATCH 23/39] ci fixes --- .github/workflows/pull.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 169aa6ba5..438b4cdd7 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -916,6 +916,11 @@ jobs: run: | echo "Installing ExecuTorch" bash torchchat/utils/scripts/build_native.sh et + - name: Install ExecuTorch python + run: | + echo "Install ExecuTorch python" + source "torchchat/utils/scripts/install_utils.sh" + install_executorch_python_libs - name: Install runner run: | echo "Installing runner" From e5c671d338644f959c73e50ff588aafee0c963ce Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 13:47:54 -0700 Subject: [PATCH 24/39] ci fixes --- .github/workflows/pull.yml | 9 ++++--- runner/build_android.sh | 2 +- torchchat/utils/scripts/build_native.sh | 31 ++++++++++++------------ torchchat/utils/scripts/install_utils.sh | 4 +-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 438b4cdd7..40b567e7b 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -915,16 +915,17 @@ jobs: continue-on-error: true run: | echo "Installing ExecuTorch" - bash torchchat/utils/scripts/build_native.sh et + bash torchchat/utils/scripts/install_et.sh - name: Install ExecuTorch python run: | echo "Install ExecuTorch python" + export TORCHCHAT_ROOT=$PWD source "torchchat/utils/scripts/install_utils.sh" install_executorch_python_libs - name: Install runner run: | echo "Installing runner" - bash torchchat/utils/scripts/build_native.sh et skip_et_install + bash torchchat/utils/scripts/build_native.sh et - name: Run inference run: | python torchchat.py download stories15M @@ -1077,11 +1078,11 @@ jobs: continue-on-error: true run: | echo "Installing ExecuTorch" - bash torchchat/utils/scripts/build_native.sh et link_torchao + bash torchchat/utils/scripts/install_et.sh - name: Install runner run: | echo "Installing runner" - bash torchchat/utils/scripts/build_native.sh et skip_et_install link_torchao + bash torchchat/utils/scripts/build_native.sh et link_torchao - name: Install runner AOTI id: install-runner-aoti run: | diff --git a/runner/build_android.sh b/runner/build_android.sh index fab222a52..c0ad02d7b 100755 --- a/runner/build_android.sh +++ b/runner/build_android.sh @@ -41,5 +41,5 @@ install_executorch_python_libs $ENABLE_ET_PYBIND export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake export ANDROID_ABI=arm64-v8a export ANDROID_PLATFORM=android-23 -install_executorch +install_executorch_cpp_libs build_runner_et diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index 48396ba80..3f2984574 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -27,7 +27,6 @@ if [ $# -eq 0 ]; then fi LINK_TORCHAO=OFF -SKIP_ET_INSTALL=OFF while (( "$#" )); do case "$1" in -h|--help) @@ -49,11 +48,6 @@ while (( "$#" )); do LINK_TORCHAO=ON shift ;; - skip_et_install) - echo "Skipping ET install..." - SKIP_ET_INSTALL=ON - shift - ;; *) echo "Invalid option: $1" show_help @@ -73,18 +67,23 @@ pushd ${TORCHCHAT_ROOT} git submodule update --init git submodule sync if [[ "$TARGET" == "et" ]]; then - find_cmake_prefix_path - if [[ "$SKIP_ET_INSTALL" == "OFF" ]]; then - install_pip_dependencies - clone_executorch - install_executorch_libs false - fi + if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" ]; then + echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install does not exist." + echo "Make sure you run install_executorch_libs" + exit 1 + fi - if [[ "$LINK_TORCHAO" == "ON" ]]; then - EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" - EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" - install_torchao_custom_executorch_ops + if [[ "$LINK_TORCHAO" == "ON" ]]; then + if [ ! -d "${TORCHCHAT_ROOT}/torchao-build" ]; then + echo "Directory ${TORCHCHAT_ROOT}/torchao-build does not exist." + echo "Make sure you run clone_torchao" + exit 1 fi + find_cmake_prefix_path + EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" + EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" + install_torchao_custom_executorch_ops + fi fi popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 29be634f4..265332861 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -107,7 +107,7 @@ COMMON_CMAKE_ARGS="\ -DEXECUTORCH_BUILD_XNNPACK=ON \ -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON" -install_executorch() { +install_executorch_cpp_libs() { # AOT lib has to be build for model export # So by default it is built, and you can explicitly opt-out EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF @@ -156,7 +156,7 @@ install_executorch() { } install_executorch_libs() { - install_executorch + install_executorch_cpp_libs install_executorch_python_libs $1 } From 9b7a1975b1da86451dd4b2d00f067f94184131c3 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:03:48 -0700 Subject: [PATCH 25/39] ci fixes --- .github/workflows/pull.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 40b567e7b..c29ffec6d 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -452,6 +452,7 @@ jobs: echo "Intalling pip3 packages" ./install/install_requirements.sh + export TORCHCHAT_ROOT=$PWD ./torchchat/utils/scripts/install_et.sh pip3 list From c7984c63e5a7de3fdff2c49f86488e37a9bf4f79 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:11:41 -0700 Subject: [PATCH 26/39] ci update --- .github/workflows/pull.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index c29ffec6d..6f123114a 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -920,9 +920,9 @@ jobs: - name: Install ExecuTorch python run: | echo "Install ExecuTorch python" - export TORCHCHAT_ROOT=$PWD - source "torchchat/utils/scripts/install_utils.sh" - install_executorch_python_libs + pushd et-build/src/executorch + sh ./install_requirements.sh + popd - name: Install runner run: | echo "Installing runner" From 832e96bb45a6ff0f248daba8da54afba672d1d63 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:32:47 -0700 Subject: [PATCH 27/39] update et pin --- .github/workflows/pull.yml | 4 +++- install/.pins/et-pin.txt | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 6f123114a..987394222 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -921,7 +921,9 @@ jobs: run: | echo "Install ExecuTorch python" pushd et-build/src/executorch - sh ./install_requirements.sh + chmod +x ./install_requirements.sh + chmod +x ./install_requirements.py + ./install_requirements.sh popd - name: Install runner run: | diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index 0a15fd2b5..af7ef4377 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -58700faa262ddf45b223353c120ffaf6b2003711 +c75711cb329cab3df91fb9083a18373f9a568377 From db4e77ce0523251a69b74e94de18c5763d108157 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:38:06 -0700 Subject: [PATCH 28/39] update python version --- .github/workflows/pull.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 987394222..5da6c86e8 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -434,7 +434,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.10.11 + python-version: '3.10.11' - name: Setup Xcode if: runner.os == 'macOS' uses: maxim-lobanov/setup-xcode@v1 @@ -577,7 +577,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.10.11 + python-version: '3.10.11' - name: Print machine info run: | uname -a @@ -624,7 +624,7 @@ jobs: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # neeps MPS, was macos-m1-stable - python-version: 3.10.11 + python-version: '3.10' script: | set -x # NS/MC: Remove previous installation of torch and torchao first @@ -737,7 +737,7 @@ jobs: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # needs MPS, was macos-m1-stable - python-version: 3.10.11 + python-version: '3.10' script: | set -x # NS/MC: Remove previous installation of torch and torchao first @@ -878,7 +878,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.10.11 + python-version: 3.10 - name: Setup Xcode if: runner.os == 'macOS' uses: maxim-lobanov/setup-xcode@v1 From 461be776a03ccdba2e7c20ee36a5010cee11a0e4 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 14:42:06 -0700 Subject: [PATCH 29/39] update python version --- .github/workflows/pull.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 5da6c86e8..47d1a8d87 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -624,8 +624,8 @@ jobs: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # neeps MPS, was macos-m1-stable - python-version: '3.10' script: | + export PYTHON_VERSION="3.10" set -x # NS/MC: Remove previous installation of torch and torchao first # as this script does not install anything into conda env but rather as system dep @@ -737,8 +737,8 @@ jobs: uses: pytorch/test-infra/.github/workflows/macos_job.yml@main with: runner: macos-m1-stable # needs MPS, was macos-m1-stable - python-version: '3.10' script: | + export PYTHON_VERSION="3.10" set -x # NS/MC: Remove previous installation of torch and torchao first # as this script does not install anything into conda env but rather as system dep @@ -878,7 +878,7 @@ jobs: - name: Setup Python uses: actions/setup-python@v2 with: - python-version: 3.10 + python-version: 3.10.11 - name: Setup Xcode if: runner.os == 'macOS' uses: maxim-lobanov/setup-xcode@v1 From eb58bf4be30b12b3ef754e8662161ccd4df12411 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:06:08 -0700 Subject: [PATCH 30/39] init --- .github/workflows/pull.yml | 90 ------------------- docs/quantization.md | 60 ------------- install/.pins/torchao-experimental-pin.txt | 1 - runner/aoti.cmake | 4 - runner/et.cmake | 24 ----- torchchat/utils/quantize.py | 47 +--------- torchchat/utils/scripts/build_native.sh | 22 +---- .../scripts/build_torchao_experimental.sh | 16 ---- torchchat/utils/scripts/install_utils.sh | 49 ---------- 9 files changed, 6 insertions(+), 307 deletions(-) delete mode 100644 install/.pins/torchao-experimental-pin.txt delete mode 100644 torchchat/utils/scripts/build_torchao_experimental.sh diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml index 47d1a8d87..87e0825b3 100644 --- a/.github/workflows/pull.yml +++ b/.github/workflows/pull.yml @@ -1025,93 +1025,3 @@ jobs: git submodule update --init ./runner/build_android.sh echo "Tests complete." - - test-torchao-experimental: - strategy: - matrix: - runner: [macos-14-xlarge] - runs-on: ${{matrix.runner}} - steps: - - name: Checkout repo - uses: actions/checkout@v3 - with: - submodules: true - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: 3.10.11 - - name: Setup Xcode - if: runner.os == 'macOS' - uses: maxim-lobanov/setup-xcode@v1 - with: - xcode-version: '15.3' - - name: Print machine info - run: | - uname -a - if [ $(uname -s) == Darwin ]; then - sysctl machdep.cpu.brand_string - sysctl machdep.cpu.core_count - fi - - name: Install torchchat - run: | - echo "Intalling pip3 packages" - ./install/install_requirements.sh - pip3 list - python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")' - - name: Install torchao-experimental - id: install-torchao-experimental - run: | - bash torchchat/utils/scripts/build_torchao_experimental.sh - - name: Set git shas - id: setup-hash - run: | - export TORCHCHAT_ROOT=${PWD} - echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV" - - name: Load or install ET - id: install-et - uses: actions/cache@v3 - env: - cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}} - with: - path: ./et-build - key: ${{env.cache-key}} - restore-keys: | - ${{env.cache-key}} - - if: ${{ steps.install-et.outputs.cache-hit != 'true' }} - continue-on-error: true - run: | - echo "Installing ExecuTorch" - bash torchchat/utils/scripts/install_et.sh - - name: Install runner - run: | - echo "Installing runner" - bash torchchat/utils/scripts/build_native.sh et link_torchao - - name: Install runner AOTI - id: install-runner-aoti - run: | - bash torchchat/utils/scripts/build_native.sh aoti link_torchao - - name: Run inference - run: | - python torchchat.py download stories110M - wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model - - export PRMT="Once upon a time in a land far away" - - echo "Generate eager" - python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' - - echo "Generate compile" - python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}" --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile - - echo "Export and run ET (C++ runner)" - python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' - ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}" - - echo "Export and run AOTI (C++ runner)" - python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' - ./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}" - - echo "Generate AOTI" - python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}" - - echo "Tests complete." diff --git a/docs/quantization.md b/docs/quantization.md index aea8a8dc6..bac6e12cc 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -118,66 +118,6 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my name is" ``` -## Experimental TorchAO lowbit kernels - -### Use -The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize. -It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false). -The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true). -Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q40 quantization scheme. - -You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16. With other platforms and argument choices, a slow fallback kernel will be used. You will see warnings about this during quantization. - -### Setup -To use a8wxdq, you must set up the torchao experimental kernels. These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon. - -From the torchchat root directory, run -``` -sh torchchat/utils/scripts/build_torchao_experimental.sh -``` - -This should take about 10 seconds to complete. Once finished, you can use a8wxdq in torchchat. - -Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao when running the scripts the build the runners. - -``` -sh torchchat/utils/scripts/build_native.sh aoti link_torchao -``` - -``` -sh torchchat/utils/scripts/build_native.sh et link_torchao -``` - -### Examples - -#### Eager mode -``` -python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' -``` - -#### torch.compile -``` -python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile -``` - -As with PyTorch in general, you can experiment with performance on a difference number of threads by defining OMP_NUM_THREADS. For example, - -``` -OMP_NUM_THREADS=6 python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile -``` - -#### AOTI -``` -python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-dso llama3.so -python3 torchchat.py generate llama3 --dso-path llama3_1.so --prompt "Hello my name is" -``` - -#### ExecuTorch -``` -python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-pte llama3.pte -``` - -Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file. ## Quantization Profiles diff --git a/install/.pins/torchao-experimental-pin.txt b/install/.pins/torchao-experimental-pin.txt deleted file mode 100644 index 9b101777d..000000000 --- a/install/.pins/torchao-experimental-pin.txt +++ /dev/null @@ -1 +0,0 @@ -3fa38aaf1276e36845a82fb399e5054718a441c4 diff --git a/runner/aoti.cmake b/runner/aoti.cmake index 35e4c1329..156e9bcce 100644 --- a/runner/aoti.cmake +++ b/runner/aoti.cmake @@ -28,7 +28,3 @@ if(Torch_FOUND) target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m) set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17) endif() - -if (LINK_TORCHAO_CUSTOM_OPS) - target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}") -endif() diff --git a/runner/et.cmake b/runner/et.cmake index 12c7fca02..27d799873 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -112,30 +112,6 @@ if(executorch_FOUND) target_link_libraries(et_run PRIVATE log) endif() - if(LINK_TORCHAO_CUSTOM_OPS) - # target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}") - target_link_libraries(et_run PRIVATE "$") - target_link_libraries(et_run PRIVATE - "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a" - "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a" - ) - endif() - - # Adding target_link_options_shared_lib as commented out below leads to this: - # - # CMake Error at Utils.cmake:22 (target_link_options): - # Cannot specify link options for target - # "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a" - # which is not built by this project. - # Call Stack (most recent call first): - # Utils.cmake:30 (macos_kernel_link_options) - # CMakeLists.txt:41 (target_link_options_shared_lib) - # - #target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch - - # This works on mac, but appears to run into issues on linux - # It is needed to solve: - # E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out else() MESSAGE(WARNING "ExecuTorch package not found") endif() diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index 041f074c2..a0d9248a9 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -96,19 +96,10 @@ def quantize_model( precision = get_precision() try: - if quantizer == "linear:a8wxdq": - quant_handler = ao_quantizer_class_dict[quantizer]( - device=device, - precision=precision, - bitwidth=q_kwargs.get("bitwidth", 4), - groupsize=q_kwargs.get("groupsize", 128), - has_weight_zeros=q_kwargs.get("has_weight_zeros", False), - ) - else: - # Easier to ask forgiveness than permission - quant_handler = ao_quantizer_class_dict[quantizer]( - groupsize=q_kwargs["groupsize"], device=device, precision=precision - ) + # Easier to ask forgiveness than permission + quant_handler = ao_quantizer_class_dict[quantizer]( + groupsize=q_kwargs["groupsize"], device=device, precision=precision + ) except TypeError as e: if "unexpected keyword argument 'device'" in str(e): quant_handler = ao_quantizer_class_dict[quantizer]( @@ -870,33 +861,3 @@ def quantized_model(self) -> nn.Module: "linear:int4": Int4WeightOnlyQuantizer, "linear:a8w4dq": Int8DynActInt4WeightQuantizer, } - -try: - import importlib.util - import sys - import os - torchao_build_path = f"{os.getcwd()}/torchao-build" - - # Try loading quantizer - torchao_experimental_quant_api_spec = importlib.util.spec_from_file_location( - "torchao_experimental_quant_api", - f"{torchao_build_path}/src/ao/torchao/experimental/quant_api.py", - ) - torchao_experimental_quant_api = importlib.util.module_from_spec(torchao_experimental_quant_api_spec) - sys.modules["torchao_experimental_quant_api"] = torchao_experimental_quant_api - torchao_experimental_quant_api_spec.loader.exec_module(torchao_experimental_quant_api) - from torchao_experimental_quant_api import Int8DynActIntxWeightQuantizer - ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightQuantizer - - # Try loading custom op - try: - import glob - libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*") - libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs)) - torch.ops.load_library(libs[0]) - except Exception as e: - print("Failed to torchao custom op library with error: ", e) - print("Slow fallback kernels will be used.") - -except Exception as e: - print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}") diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh index 3f2984574..924b86a65 100755 --- a/torchchat/utils/scripts/build_native.sh +++ b/torchchat/utils/scripts/build_native.sh @@ -26,7 +26,6 @@ if [ $# -eq 0 ]; then exit 1 fi -LINK_TORCHAO=OFF while (( "$#" )); do case "$1" in -h|--help) @@ -43,11 +42,6 @@ while (( "$#" )); do TARGET="et" shift ;; - link_torchao) - echo "Linking with torchao custom ops..." - LINK_TORCHAO=ON - shift - ;; *) echo "Invalid option: $1" show_help @@ -72,26 +66,14 @@ if [[ "$TARGET" == "et" ]]; then echo "Make sure you run install_executorch_libs" exit 1 fi - - if [[ "$LINK_TORCHAO" == "ON" ]]; then - if [ ! -d "${TORCHCHAT_ROOT}/torchao-build" ]; then - echo "Directory ${TORCHCHAT_ROOT}/torchao-build does not exist." - echo "Make sure you run clone_torchao" - exit 1 - fi - find_cmake_prefix_path - EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src" - EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a" - install_torchao_custom_executorch_ops - fi fi popd # CMake commands if [[ "$TARGET" == "et" ]]; then - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja else - cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja + cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja fi cmake --build ./cmake-out --target "${TARGET}"_run diff --git a/torchchat/utils/scripts/build_torchao_experimental.sh b/torchchat/utils/scripts/build_torchao_experimental.sh deleted file mode 100644 index 1df3e80c6..000000000 --- a/torchchat/utils/scripts/build_torchao_experimental.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - - - -source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh" - -pushd ${TORCHCHAT_ROOT} -find_cmake_prefix_path -clone_torchao -install_torchao_custom_aten_ops -popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 265332861..fafc8eccd 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -159,52 +159,3 @@ install_executorch_libs() { install_executorch_cpp_libs install_executorch_python_libs $1 } - -clone_torchao() { - echo "Cloning torchao to ${TORCHCHAT_ROOT}/torchao-build/src" - rm -rf ${TORCHCHAT_ROOT}/torchao-build/src - mkdir -p ${TORCHCHAT_ROOT}/torchao-build/src - pushd ${TORCHCHAT_ROOT}/torchao-build/src - echo $pwd - - cp -R ${HOME}/fbsource/fbcode/pytorch/ao . - # git clone https://github.com/pytorch/ao.git - # cd ao - # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt) - - popd -} - -install_torchao_custom_aten_ops() { - echo "Building torchao custom ops for ATen" - pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental - - CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out - cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ - -DCMAKE_BUILD_TYPE="Release" \ - -DTORCHAO_OP_TARGET="ATEN" \ - -S . \ - -B ${CMAKE_OUT_DIR} -G Ninja - cmake --build ${CMAKE_OUT_DIR} --target install --config Release - - popd -} - -install_torchao_custom_executorch_ops() { - echo "Building torchao custom ops for ExecuTorch" - pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental - - CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out" - cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ - -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \ - -DCMAKE_BUILD_TYPE="Release" \ - -DTORCHAO_OP_TARGET="EXECUTORCH" \ - -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \ - -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \ - -S . \ - -B ${CMAKE_OUT_DIR} -G Ninja - cmake --build ${CMAKE_OUT_DIR} --target install --config Release - - popd -} From cb2238d5fefbeccef716008b484db83c35819e35 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 15:09:23 -0700 Subject: [PATCH 31/39] typo --- .gitignore | 1 - docs/quantization.md | 1 - 2 files changed, 2 deletions(-) diff --git a/.gitignore b/.gitignore index ee856fcd2..3f25b76c0 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,6 @@ __pycache__/ # Build directories build/android/* et-build/* -torchao-build/* runner-et/cmake-out/* runner-aoti/cmake-out/* cmake-out/ diff --git a/docs/quantization.md b/docs/quantization.md index bac6e12cc..1f619e58e 100644 --- a/docs/quantization.md +++ b/docs/quantization.md @@ -118,7 +118,6 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr python3 torchchat.py generate llama3 --pte-path llama3.pte --prompt "Hello my name is" ``` - ## Quantization Profiles Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/torchchat/quant_config/) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json` From d974f5008210c7123d75b271b4040afb2a252622 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 16:39:57 -0700 Subject: [PATCH 32/39] update pytorch pin --- install/.pins/et-pin.txt | 2 +- install/install_requirements.sh | 4 ++-- torchchat/utils/scripts/install_utils.sh | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt index af7ef4377..01c77f102 100644 --- a/install/.pins/et-pin.txt +++ b/install/.pins/et-pin.txt @@ -1 +1 @@ -c75711cb329cab3df91fb9083a18373f9a568377 +af098c31b6f8d5f38e40a5cf35784b0969d97df8 diff --git a/install/install_requirements.sh b/install/install_requirements.sh index b698315ff..47fd5b36d 100755 --- a/install/install_requirements.sh +++ b/install/install_requirements.sh @@ -47,10 +47,10 @@ fi # NOTE: If a newly-fetched version of the executorch repo changes the value of # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary # package versions. -PYTORCH_NIGHTLY_VERSION=dev20240814 +PYTORCH_NIGHTLY_VERSION=dev20240901 # Nightly version for torchvision -VISION_NIGHTLY_VERSION=dev20240814 +VISION_NIGHTLY_VERSION=dev20240901 # Nightly version for torchtune TUNE_NIGHTLY_VERSION=dev20240916 diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index fafc8eccd..06d978cfa 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -75,6 +75,7 @@ clone_executorch() { clone_executorch_internal } + install_executorch_python_libs() { if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}" ]; then echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR} does not exist." From efe74b8dc8f2401e4cd2d4d79a268749b1a922d4 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Thu, 19 Sep 2024 17:05:14 -0700 Subject: [PATCH 33/39] tensor_ptr arg order change --- runner/run.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/runner/run.cpp b/runner/run.cpp index 99eb7bfb9..52d596749 100644 --- a/runner/run.cpp +++ b/runner/run.cpp @@ -213,8 +213,8 @@ float* forward(Transformer* transformer, int token, int pos) { .to(torch::kCPU); auto logits = result[0].data_ptr(); #else // __ET_MODEL__ - TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); - TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); + TensorPtr pos_managed = make_tensor_ptr({1}, pos_buffer, ScalarType::Long); + TensorPtr tokens_managed = make_tensor_ptr({1, 1}, token_buffer, ScalarType::Long); std::vector inputs; auto tmp1 = EValue(tokens_managed); auto tmp2 = EValue(pos_managed); From 501675575e2d08ff895baf7537e8354bba24a206 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:37:37 -0700 Subject: [PATCH 34/39] fixes --- install/requirements.txt | 2 +- runner/et.cmake | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/install/requirements.txt b/install/requirements.txt index 7bb3b74b5..1647ce00e 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -12,7 +12,7 @@ tiktoken # Miscellaneous snakeviz sentencepiece -numpy>=1.23.5,<2.0 +numpy<2.0 gguf lm-eval==0.4.2 blobfile diff --git a/runner/et.cmake b/runner/et.cmake index 27d799873..9024182a1 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -62,6 +62,7 @@ if(executorch_FOUND) set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch) set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack) + list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp) list(APPEND _common_include_directories ${XNNPACK_ROOT}/third-party/cpuinfo/include) From 4ce0de853225aa942f3f057f101d473938e0b2a6 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Fri, 20 Sep 2024 10:50:16 -0700 Subject: [PATCH 35/39] update --- runner/et.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runner/et.cmake b/runner/et.cmake index 9024182a1..0f8c8e908 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -62,7 +62,7 @@ if(executorch_FOUND) set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch) set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack) - list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp) + list(APPEND _srcs ${EXECUTORCH_SRC_ROOT}/extension/threadpool/cpuinfo_utils.cpp) list(APPEND _common_include_directories ${XNNPACK_ROOT}/third-party/cpuinfo/include) From db88dc84e5d9a3d2f7baf8d316dcbcd9f06f4d9f Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Mon, 23 Sep 2024 10:11:00 -0700 Subject: [PATCH 36/39] Add import for quantized decomposed ops Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: --- torchchat/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torchchat/model.py b/torchchat/model.py index ea7710a27..81c06e495 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -932,6 +932,9 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor: from executorch.extension.pybindings import portable_lib as exec_lib # ET changed the way it's loading the custom ops so it's not included in portable_lib but has to be loaded separately. + # For quantized_decomposed ops + from executorch.kernels import quantized # no-qa + # For llama::sdpa_with_kv_cache.out, preprocess ops from executorch.extension.llm.custom_ops import sdpa_with_kv_cache # no-qa class PTEModel(nn.Module): From 517ec519d348f22ac67356a3b32d8a3557cf1638 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:40:02 -0700 Subject: [PATCH 37/39] remove whole archive from custom_op --- install/requirements.txt | 2 +- runner/et.cmake | 5 +++-- torchchat/utils/scripts/install_et.sh | 2 +- torchchat/utils/scripts/install_utils.sh | 3 ++- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/install/requirements.txt b/install/requirements.txt index 1647ce00e..bbb1d56d1 100644 --- a/install/requirements.txt +++ b/install/requirements.txt @@ -12,7 +12,7 @@ tiktoken # Miscellaneous snakeviz sentencepiece -numpy<2.0 +numpy < 2.0 gguf lm-eval==0.4.2 blobfile diff --git a/runner/et.cmake b/runner/et.cmake index 0f8c8e908..0fbac2fce 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -95,10 +95,13 @@ if(executorch_FOUND) XNNPACK pthreadpool cpuinfo + custom_ops ) target_link_options_shared_lib(optimized_native_cpu_ops_lib) target_link_options_shared_lib(quantized_ops_lib) target_link_options_shared_lib(xnnpack_backend) + target_link_options_shared_lib(custom_ops) + # Not clear why linking executorch as whole-archive outside android/apple is leading # to double registration. Most likely because of linkage issues. # Will figure this out later. Until then use this. @@ -106,8 +109,6 @@ if(executorch_FOUND) target_link_options_shared_lib(executorch) endif() - target_link_libraries(et_run PRIVATE - "$") # This one is needed for cpuinfo where it uses android specific log lib if(ANDROID) target_link_libraries(et_run PRIVATE log) diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh index 22c3ac80a..1d8c6e2b2 100755 --- a/torchchat/utils/scripts/install_et.sh +++ b/torchchat/utils/scripts/install_et.sh @@ -17,7 +17,7 @@ ENABLE_ET_PYBIND="${1:-true}" pushd ${TORCHCHAT_ROOT} find_cmake_prefix_path -install_pip_dependencies clone_executorch install_executorch_libs $ENABLE_ET_PYBIND +install_executorch_python_libs $ENABLE_ET_PYBIND popd diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh index 06d978cfa..2da3d044c 100644 --- a/torchchat/utils/scripts/install_utils.sh +++ b/torchchat/utils/scripts/install_utils.sh @@ -148,7 +148,6 @@ install_executorch_cpp_libs() { -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=${EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR} \ -DEXECUTORCH_BUILD_KERNELS_CUSTOM=${EXECUTORCH_BUILD_KERNELS_CUSTOM_VAR} \ - -DEXECUTORCH_BUILD_XNNPACK=ON \ ${CROSS_COMPILE_ARGS} \ -S . -B ${CMAKE_OUT_DIR} -G Ninja cmake --build ${CMAKE_OUT_DIR} @@ -157,6 +156,8 @@ install_executorch_cpp_libs() { } install_executorch_libs() { + EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF + EXECUTORCH_BUILD_KERNELS_CUSTOM_VAR=OFF install_executorch_cpp_libs install_executorch_python_libs $1 } From e5a325ea635bc43a1757d370f31af0a73309addd Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:52:16 -0700 Subject: [PATCH 38/39] add imported lib --- runner/et.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/runner/et.cmake b/runner/et.cmake index 0fbac2fce..2aa3efd6f 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -69,6 +69,8 @@ if(executorch_FOUND) list(APPEND _common_include_directories ${XNNPACK_ROOT}/third-party/pthreadpool/include) endif() + add_library(custom_ops STATIC IMPORTED) + set_property(TARGET custom_ops PROPERTY IMPORTED_LOCATION ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a) target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this add_executable(et_run ${_srcs}) @@ -109,6 +111,8 @@ if(executorch_FOUND) target_link_options_shared_lib(executorch) endif() + # target_link_libraries(et_run PRIVATE + # "$") # This one is needed for cpuinfo where it uses android specific log lib if(ANDROID) target_link_libraries(et_run PRIVATE log) From c79397b8da6874e9601707c00c22d7119ff773b6 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Mon, 23 Sep 2024 10:52:32 -0700 Subject: [PATCH 39/39] add imported lib --- runner/et.cmake | 2 -- 1 file changed, 2 deletions(-) diff --git a/runner/et.cmake b/runner/et.cmake index 2aa3efd6f..99e67a025 100644 --- a/runner/et.cmake +++ b/runner/et.cmake @@ -111,8 +111,6 @@ if(executorch_FOUND) target_link_options_shared_lib(executorch) endif() - # target_link_libraries(et_run PRIVATE - # "$") # This one is needed for cpuinfo where it uses android specific log lib if(ANDROID) target_link_libraries(et_run PRIVATE log)