From 7d4686af79ab694b281d84c270d7a4a32247b606 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Fri, 13 Sep 2024 14:24:25 -0700
Subject: [PATCH 01/39] add new torchao experimental kernels to torchchat

---
 .gitignore                                    |  1 +
 install/.pins/torchao-experimental-pin.txt    |  1 +
 runner/aoti.cmake                             |  4 ++
 runner/et.cmake                               |  4 ++
 torchchat/utils/quantize.py                   | 47 +++++++++++++++++--
 torchchat/utils/scripts/build_native.sh       | 17 ++++++-
 .../scripts/build_torchao_experimental.sh     | 16 +++++++
 torchchat/utils/scripts/install_utils.sh      | 45 ++++++++++++++++++
 8 files changed, 129 insertions(+), 6 deletions(-)
 create mode 100644 install/.pins/torchao-experimental-pin.txt
 create mode 100644 torchchat/utils/scripts/build_torchao_experimental.sh

diff --git a/.gitignore b/.gitignore
index 3f25b76c0..ee856fcd2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,6 +14,7 @@ __pycache__/
 # Build directories
 build/android/*
 et-build/*
+torchao-build/*
 runner-et/cmake-out/*
 runner-aoti/cmake-out/*
 cmake-out/
diff --git a/install/.pins/torchao-experimental-pin.txt b/install/.pins/torchao-experimental-pin.txt
new file mode 100644
index 000000000..9b101777d
--- /dev/null
+++ b/install/.pins/torchao-experimental-pin.txt
@@ -0,0 +1 @@
+3fa38aaf1276e36845a82fb399e5054718a441c4
diff --git a/runner/aoti.cmake b/runner/aoti.cmake
index 156e9bcce..5449f2156 100644
--- a/runner/aoti.cmake
+++ b/runner/aoti.cmake
@@ -28,3 +28,7 @@ if(Torch_FOUND)
     target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
     set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
 endif()
+
+if (LINK_TORCHAO_CUSTOM_OPS)
+    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
+endif()
diff --git a/runner/et.cmake b/runner/et.cmake
index 7fc16b1f2..b6eee8a2f 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -111,6 +111,10 @@ if(executorch_FOUND)
     target_link_libraries(et_run PRIVATE log)
   endif()
 
+  if(LINK_TORCHAO_CUSTOM_OPS)
+    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}")
+  endif()
+
   # Adding target_link_options_shared_lib as commented out below leads to this:
   #
   # CMake Error at Utils.cmake:22 (target_link_options):
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
index a0d9248a9..df8a39b04 100644
--- a/torchchat/utils/quantize.py
+++ b/torchchat/utils/quantize.py
@@ -96,10 +96,19 @@ def quantize_model(
                 precision = get_precision()
 
             try:
-                # Easier to ask forgiveness than permission
-                quant_handler = ao_quantizer_class_dict[quantizer](
-                    groupsize=q_kwargs["groupsize"], device=device, precision=precision
-                )
+                if quantizer == "linear:a8wxdq":
+                    quant_handler = ao_quantizer_class_dict[quantizer](
+                        device=device,
+                        precision=precision,
+                        bitwidth=q_kwargs.get("bitwidth", 4),
+                        groupsize=q_kwargs.get("groupsize", 128),
+                        has_weight_zeros=q_kwargs.get("has_weight_zeros", False),
+                    )
+                else:
+                    # Easier to ask forgiveness than permission
+                    quant_handler = ao_quantizer_class_dict[quantizer](
+                        groupsize=q_kwargs["groupsize"], device=device, precision=precision
+                    )
             except TypeError as e:
                 if "unexpected keyword argument 'device'" in str(e):
                     quant_handler = ao_quantizer_class_dict[quantizer](
@@ -861,3 +870,33 @@ def quantized_model(self) -> nn.Module:
     "linear:int4": Int4WeightOnlyQuantizer,
     "linear:a8w4dq": Int8DynActInt4WeightQuantizer,
 }
+
+try:
+    import importlib.util
+    import sys
+    import os
+    torchao_build_path = f"{os.getcwd()}/torchao-build"
+
+    # Try loading quantizer
+    torchao_experimental_quant_api_spec = importlib.util.spec_from_file_location(
+        "torchao_experimental_quant_api",
+        f"{torchao_build_path}/src/ao/torchao/experimental/quant_api.py",
+    )
+    torchao_experimental_quant_api = importlib.util.module_from_spec(torchao_experimental_quant_api_spec)
+    sys.modules["torchao_experimental_quant_api"] = torchao_experimental_quant_api
+    torchao_experimental_quant_api_spec.loader.exec_module(torchao_experimental_quant_api)
+    from torchao_experimental_quant_api import Int8DynActIntxWeightQuantizer
+    ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightQuantizer
+
+    # Try loading custom op
+    try:
+        import glob
+        libs = glob.glob(f"{torchao_build_path}/cmake-out/liblowbit_op_aten.*")
+        libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
+        torch.ops.load_library(libs[0])
+    except Exception as e:
+        print("Failed to torchao custom op library with error: ", e)
+        print("Slow fallback kernels will be used.")
+
+except Exception as e:
+    print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}")
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index aacd97415..b75342dc2 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -25,6 +25,8 @@ if [ $# -eq 0 ]; then
     show_help
     exit 1
 fi
+
+LINK_TORCHAO=OFF
 while (( "$#" )); do
   case "$1" in
     -h|--help)
@@ -41,6 +43,11 @@ while (( "$#" )); do
       TARGET="et"
       shift
       ;;
+    link_torchao)
+      echo "Linking with torchao custom ops..."
+      LINK_TORCHAO=ON
+      shift
+      ;;
     *)
       echo "Invalid option: $1"
       show_help
@@ -72,14 +79,20 @@ if [[ "$TARGET" == "et" ]]; then
     install_pip_dependencies
     clone_executorch
     install_executorch_libs false
+
+    if [[ "$LINK_TORCHAO" == "ON" ]]; then
+      EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
+      EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a"
+      install_torchao_custom_executorch_ops
+    fi
 fi
 popd
 
 # CMake commands
 if [[ "$TARGET" == "et" ]]; then
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
 else
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
 fi
 cmake --build ./cmake-out --target "${TARGET}"_run
 
diff --git a/torchchat/utils/scripts/build_torchao_experimental.sh b/torchchat/utils/scripts/build_torchao_experimental.sh
new file mode 100644
index 000000000..1df3e80c6
--- /dev/null
+++ b/torchchat/utils/scripts/build_torchao_experimental.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+
+
+source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh"
+
+pushd ${TORCHCHAT_ROOT}
+find_cmake_prefix_path
+clone_torchao
+install_torchao_custom_aten_ops
+popd
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index f915402e7..c63234c0f 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -162,3 +162,48 @@ install_executorch_libs() {
 
   install_executorch_python_libs $1
 }
+
+clone_torchao() {
+  echo "Cloning torchao to ${TORCHCHAT_ROOT}/torchao-build/src"
+  rm -rf ${TORCHCHAT_ROOT}/torchao-build/src
+  mkdir -p ${TORCHCHAT_ROOT}/torchao-build/src
+  pushd ${TORCHCHAT_ROOT}/torchao-build/src
+  echo $pwd
+
+  cp -R /Users/scroy/fbsource/fbcode/pytorch/ao .
+  # git clone https://github.com/pytorch/ao.git
+  # cd ao
+  # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt)
+
+  popd
+}
+
+install_torchao_custom_aten_ops() {
+  echo "Building torchao custom ops for ATen"
+  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op
+  export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao
+
+  CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out
+  cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \
+    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DPLATFORM="ATEN" \
+    -S . \
+    -B ${CMAKE_OUT_DIR} -G Ninja
+  cmake --build  ${CMAKE_OUT_DIR}
+}
+
+install_torchao_custom_executorch_ops() {
+  echo "Building torchao custom ops for ExecuTorch"
+  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op
+  export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao
+
+  CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
+  cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \
+    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \
+    -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \
+    -DPLATFORM="EXECUTORCH" \
+    -S . \
+    -B ${CMAKE_OUT_DIR} -G Ninja
+  cmake --build  ${CMAKE_OUT_DIR}
+}

From 8ae346eee8b6b478d8ea05d823a14d491e98ef5b Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Fri, 13 Sep 2024 14:53:36 -0700
Subject: [PATCH 02/39] add doc

---
 docs/quantization.md | 62 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/docs/quantization.md b/docs/quantization.md
index 1f619e58e..6245e8b6d 100644
--- a/docs/quantization.md
+++ b/docs/quantization.md
@@ -118,6 +118,68 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr
 python3 torchchat.py generate llama3 --pte-path llama3.pte  --prompt "Hello my name is"
 ```
 
+## Experimental TorchAO lowbit kernels
+
+### Use
+The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize.
+It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
+The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true).
+Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q40 quantization scheme.
+
+You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16.  With other platforms and argument choices, a slow fallback kernel will be used.  You will see warnings about this during quantization.
+
+### Setup
+To use a8wxdq, you must set up the torchao experimental kernels.  These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon.
+
+From the torchchat root directory, run
+```
+sh torchchat/utils/scripts/build_torchao_experimental.sh
+```
+
+This should take about 10 seconds to complete.  Once finished, you can use a8wxdq in torchchat.
+
+Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao when running the scripts the build the runners.
+
+```
+sh torchchat/utils/scripts/build_native.sh aoti link_torchao
+```
+
+```
+sh torchchat/utils/scripts/build_native.sh et link_torchao
+```
+
+### Examples
+
+#### Eager mode
+```
+python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
+```
+
+#### torch.compile
+```
+python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
+```
+
+As with PyTorch in general, you can experiment with performance on a difference number of threads by defining OMP_NUM_THREADS.  For example,
+
+```
+OMP_NUM_THREADS=6 python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
+```
+
+#### AOTI
+```
+python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-dso llama3.so
+python3 torchchat.py generate llama3 --dso-path llama3_1.so --prompt "Hello my name is"
+```
+
+#### ExecuTorch
+```
+python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-pte llama3.pte
+```
+
+Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file.
+Also note that the ExecuTorch op that wraps the new torchao kernel is currently single threaded.
+
 ## Quantization Profiles
 
 Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/torchchat/quant_config/) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json`

From f9bea2b0445fbd2fbd23ad2cca184c9380132144 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Sat, 14 Sep 2024 20:44:05 -0700
Subject: [PATCH 03/39] update torchao library name

---
 runner/aoti.cmake                        |  2 +-
 runner/et.cmake                          |  2 +-
 torchchat/utils/quantize.py              |  2 +-
 torchchat/utils/scripts/install_utils.sh | 19 ++++++++++---------
 4 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/runner/aoti.cmake b/runner/aoti.cmake
index 5449f2156..ef7275ede 100644
--- a/runner/aoti.cmake
+++ b/runner/aoti.cmake
@@ -30,5 +30,5 @@ if(Torch_FOUND)
 endif()
 
 if (LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
diff --git a/runner/et.cmake b/runner/et.cmake
index b6eee8a2f..5b5d2629d 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -112,7 +112,7 @@ if(executorch_FOUND)
   endif()
 
   if(LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/liblowbit_op_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
   endif()
 
   # Adding target_link_options_shared_lib as commented out below leads to this:
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
index df8a39b04..8cfe536e2 100644
--- a/torchchat/utils/quantize.py
+++ b/torchchat/utils/quantize.py
@@ -891,7 +891,7 @@ def quantized_model(self) -> nn.Module:
     # Try loading custom op
     try:
         import glob
-        libs = glob.glob(f"{torchao_build_path}/cmake-out/liblowbit_op_aten.*")
+        libs = glob.glob(f"{torchao_build_path}/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten.*")
         libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
         torch.ops.load_library(libs[0])
     except Exception as e:
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index c63234c0f..37a2f6a71 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -180,30 +180,31 @@ clone_torchao() {
 
 install_torchao_custom_aten_ops() {
   echo "Building torchao custom ops for ATen"
-  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op
-  export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao
+  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
 
   CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out
-  cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \
-    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-    -DPLATFORM="ATEN" \
+  cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DTORCHAO_OP_TARGET="ATEN" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build  ${CMAKE_OUT_DIR}
+
+  popd
 }
 
 install_torchao_custom_executorch_ops() {
   echo "Building torchao custom ops for ExecuTorch"
-  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental/kernels/cpu/linear/examples/torch_custom_op
+  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
   export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao
 
   CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
-  cmake -DTORCHAO_INCLUDE_DIRS=${TORCHAO_INCLUDE_DIRS} \
-    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+  cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
     -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \
     -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \
-    -DPLATFORM="EXECUTORCH" \
+    -DTORCHAO_OP_TARGET="EXECUTORCH" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build  ${CMAKE_OUT_DIR}
+
+  popd
 }

From c15f06b21f766e9146fcbe080f06140ded3e66cd Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Sat, 14 Sep 2024 21:02:43 -0700
Subject: [PATCH 04/39] typo

---
 runner/et.cmake                          | 2 +-
 torchchat/utils/scripts/install_utils.sh | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/runner/et.cmake b/runner/et.cmake
index 5b5d2629d..921dce93c 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -112,7 +112,7 @@ if(executorch_FOUND)
   endif()
 
   if(LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}")
   endif()
 
   # Adding target_link_options_shared_lib as commented out below leads to this:
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 37a2f6a71..3abe93ceb 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -195,12 +195,11 @@ install_torchao_custom_aten_ops() {
 install_torchao_custom_executorch_ops() {
   echo "Building torchao custom ops for ExecuTorch"
   pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
-  export TORCHAO_INCLUDE_DIRS=${TORCHCHAT_ROOT}/torchao-build/src/ao
 
   CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-    -DEXECUTORCH_INCLUDE_DIRS=${EXECUTORCH_INCLUDE_DIRS} \
-    -DEXECUTORCH_LIBRARIES=${EXECUTORCH_LIBRARIES} \
+    -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
+    -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
     -DTORCHAO_OP_TARGET="EXECUTORCH" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja

From f7f8bf8752f9552d602d738f1ed99e02a64cb9f9 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Mon, 16 Sep 2024 09:29:30 -0700
Subject: [PATCH 05/39] add multithreading to ET runner

---
 torchchat/utils/scripts/build_native.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index b75342dc2..eecd07884 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -77,7 +77,7 @@ git submodule sync
 if [[ "$TARGET" == "et" ]]; then
     find_cmake_prefix_path
     install_pip_dependencies
-    clone_executorch
+    # clone_executorch
     install_executorch_libs false
 
     if [[ "$LINK_TORCHAO" == "ON" ]]; then
@@ -90,7 +90,7 @@ popd
 
 # CMake commands
 if [[ "$TARGET" == "et" ]]; then
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
 else
     cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
 fi

From c677aa5e7677b7590c7e09dea299822d3fcf8a46 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Mon, 16 Sep 2024 17:36:28 -0700
Subject: [PATCH 06/39] update lib names

---
 runner/aoti.cmake           | 2 +-
 runner/et.cmake             | 2 +-
 torchchat/utils/quantize.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/runner/aoti.cmake b/runner/aoti.cmake
index ef7275ede..35e4c1329 100644
--- a/runner/aoti.cmake
+++ b/runner/aoti.cmake
@@ -30,5 +30,5 @@ if(Torch_FOUND)
 endif()
 
 if (LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}")
 endif()
diff --git a/runner/et.cmake b/runner/et.cmake
index 921dce93c..108102b86 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -112,7 +112,7 @@ if(executorch_FOUND)
   endif()
 
   if(LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_executorch${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}")
   endif()
 
   # Adding target_link_options_shared_lib as commented out below leads to this:
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
index 8cfe536e2..041f074c2 100644
--- a/torchchat/utils/quantize.py
+++ b/torchchat/utils/quantize.py
@@ -891,7 +891,7 @@ def quantized_model(self) -> nn.Module:
     # Try loading custom op
     try:
         import glob
-        libs = glob.glob(f"{torchao_build_path}/cmake-out/ops/linear/linear_a8wxdq_op/liblinear_a8wxdq_aten.*")
+        libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*")
         libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
         torch.ops.load_library(libs[0])
     except Exception as e:

From 53c24c317e3bea260cbf32c1496637e699ca0a59 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Mon, 16 Sep 2024 17:50:22 -0700
Subject: [PATCH 07/39] update lib name

---
 torchchat/utils/scripts/install_utils.sh | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 3abe93ceb..0102d7707 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -184,10 +184,11 @@ install_torchao_custom_aten_ops() {
 
   CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
     -DTORCHAO_OP_TARGET="ATEN" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
-  cmake --build  ${CMAKE_OUT_DIR}
+  cmake --build  ${CMAKE_OUT_DIR} --target install --config Release
 
   popd
 }
@@ -198,12 +199,13 @@ install_torchao_custom_executorch_ops() {
 
   CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
+    -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
     -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
     -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
     -DTORCHAO_OP_TARGET="EXECUTORCH" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
-  cmake --build  ${CMAKE_OUT_DIR}
+  cmake --build  ${CMAKE_OUT_DIR} --target install --config Release
 
   popd
 }

From 504375e06e352136c7549d66a97189afd5d23610 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 18 Sep 2024 13:47:07 -0700
Subject: [PATCH 08/39] update torchao ExecuTorch op lib to static

---
 install/.pins/et-pin.txt                 |  2 +-
 runner/et.cmake                          |  9 +++++++--
 runner/run.cpp                           | 17 +++++++++--------
 torchchat/export.py                      |  3 +--
 torchchat/utils/scripts/build_native.sh  |  6 +++---
 torchchat/utils/scripts/install_utils.sh |  4 +++-
 6 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt
index a6f1373dd..0a15fd2b5 100644
--- a/install/.pins/et-pin.txt
+++ b/install/.pins/et-pin.txt
@@ -1 +1 @@
-91298923a0076c1b41059efb6dad2876426e4b03
+58700faa262ddf45b223353c120ffaf6b2003711
diff --git a/runner/et.cmake b/runner/et.cmake
index 108102b86..7715ce656 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -62,7 +62,6 @@ if(executorch_FOUND)
 
     set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch)
     set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack)
-    list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
     list(APPEND _common_include_directories
          ${XNNPACK_ROOT}/third-party/cpuinfo/include)
 
@@ -80,7 +79,9 @@ if(executorch_FOUND)
     et_run PRIVATE
     executorch
     extension_module
+    extension_tensor
     extension_data_loader
+    extension_threadpool
     optimized_kernels
     quantized_kernels
     portable_kernels
@@ -112,7 +113,11 @@ if(executorch_FOUND)
   endif()
 
   if(LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}")
+    target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
+    target_link_libraries(et_run PRIVATE
+      "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"
+      "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a"
+    )
   endif()
 
   # Adding target_link_options_shared_lib as commented out below leads to this:
diff --git a/runner/run.cpp b/runner/run.cpp
index 999ad2fcc..455f9e076 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -39,19 +39,20 @@ torch::Device aoti_device(torch::kCPU);
 
 #else // __ET_MODEL__
 #include <executorch/extension/module/module.h>
-#include <executorch/extension/runner_util/managed_tensor.h>
+#include <executorch/extension/tensor/tensor_ptr.h>
 #include <executorch/runtime/core/evalue.h>
 #include <executorch/runtime/core/exec_aten/exec_aten.h>
 #include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>
 
 #if defined(ET_USE_ADAPTIVE_THREADS)
-#include <executorch/backends/xnnpack/threadpool/cpuinfo_utils.h>
-#include <executorch/backends/xnnpack/threadpool/threadpool.h>
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
 #endif
 
 using exec_aten::ScalarType;
 using torch::executor::EValue;
-using torch::executor::ManagedTensor;
+using executorch::extension::TensorPtr;
+using executorch::extension::make_tensor_ptr;
 using torch::executor::Module;
 using torch::executor::Result;
 #endif
@@ -212,11 +213,11 @@ float* forward(Transformer* transformer, int token, int pos) {
                              .to(torch::kCPU);
   auto logits = result[0].data_ptr();
 #else // __ET_MODEL__
-  ManagedTensor pos_managed(pos_buffer, {1}, ScalarType::Long);
-  ManagedTensor tokens_managed(token_buffer, {1, 1}, ScalarType::Long);
+  TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); //(pos_buffer, {1}, ScalarType::Long);
+  TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); //(token_buffer, {1, 1}, ScalarType::Long);
   std::vector<EValue> inputs;
-  auto tmp1 = EValue(tokens_managed.get_aliasing_tensor());
-  auto tmp2 = EValue(pos_managed.get_aliasing_tensor());
+  auto tmp1 = EValue(tokens_managed); //.get_aliasing_tensor());
+  auto tmp2 = EValue(pos_managed); //.get_aliasing_tensor());
 
   inputs.push_back(tmp1);
   inputs.push_back(tmp2);
diff --git a/torchchat/export.py b/torchchat/export.py
index affb8b871..b28e8023f 100644
--- a/torchchat/export.py
+++ b/torchchat/export.py
@@ -194,7 +194,7 @@ def forward(self, x, freqs_cis, mask, input_pos=None):
             return self.wo(output)
 
     def replace_attention_with_custom_sdpa_attention(module: nn.Module):
-        from executorch.examples.models.llama2.custom_ops import (  # noqa
+        from executorch.extension.llm.custom_ops import (  # noqa
             sdpa_with_kv_cache,
         )
 
@@ -304,7 +304,6 @@ def export_for_et(model, device, output_path) -> str:
         edge_manager = edge_manager.to_backend(XnnpackDynamicallyQuantizedPartitioner())
         export_program = edge_manager.to_executorch(
             ExecutorchBackendConfig(
-                extract_constant_segment=True,
                 extract_delegate_segments=True,
                 passes=[
                     QuantFusionPass(),
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index eecd07884..d422f6ae0 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -77,12 +77,12 @@ git submodule sync
 if [[ "$TARGET" == "et" ]]; then
     find_cmake_prefix_path
     install_pip_dependencies
-    # clone_executorch
+    clone_executorch
     install_executorch_libs false
 
     if [[ "$LINK_TORCHAO" == "ON" ]]; then
-      EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
-      EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a"
+      EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
+      EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
       install_torchao_custom_executorch_ops
     fi
 fi
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 0102d7707..9ccf6a924 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -185,6 +185,7 @@ install_torchao_custom_aten_ops() {
   CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
     -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
+    -DCMAKE_BUILD_TYPE="Release" \
     -DTORCHAO_OP_TARGET="ATEN" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
@@ -200,9 +201,10 @@ install_torchao_custom_executorch_ops() {
   CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
   cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
     -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
+    -DCMAKE_BUILD_TYPE="Release" \
+    -DTORCHAO_OP_TARGET="EXECUTORCH" \
     -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
     -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
-    -DTORCHAO_OP_TARGET="EXECUTORCH" \
     -S . \
     -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build  ${CMAKE_OUT_DIR} --target install --config Release

From d5dded2261b49ccd8e1e934ebd1c51fc85346c4f Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 18 Sep 2024 13:52:04 -0700
Subject: [PATCH 09/39] remove old comment

---
 runner/run.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/runner/run.cpp b/runner/run.cpp
index 455f9e076..99eb7bfb9 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -213,11 +213,11 @@ float* forward(Transformer* transformer, int token, int pos) {
                              .to(torch::kCPU);
   auto logits = result[0].data_ptr();
 #else // __ET_MODEL__
-  TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer); //(pos_buffer, {1}, ScalarType::Long);
-  TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer); //(token_buffer, {1, 1}, ScalarType::Long);
+  TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer);
+  TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer);
   std::vector<EValue> inputs;
-  auto tmp1 = EValue(tokens_managed); //.get_aliasing_tensor());
-  auto tmp2 = EValue(pos_managed); //.get_aliasing_tensor());
+  auto tmp1 = EValue(tokens_managed);
+  auto tmp2 = EValue(pos_managed);
 
   inputs.push_back(tmp1);
   inputs.push_back(tmp2);

From 33450893d1629f644a6ba084e7f870ebc2485f01 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 18 Sep 2024 14:02:59 -0700
Subject: [PATCH 10/39] add dylib for poor perf repro testing

---
 runner/et.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/runner/et.cmake b/runner/et.cmake
index 7715ce656..12c7fca02 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -113,6 +113,7 @@ if(executorch_FOUND)
   endif()
 
   if(LINK_TORCHAO_CUSTOM_OPS)
+    # target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}")
     target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
     target_link_libraries(et_run PRIVATE
       "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"

From bf1e72751ee3689796880c911b9737c72b283fd7 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 18 Sep 2024 16:07:51 -0700
Subject: [PATCH 11/39] change /Users/scroy to /Users/scroy

---
 torchchat/utils/scripts/install_utils.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 9ccf6a924..e4bbaadd3 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -170,7 +170,7 @@ clone_torchao() {
   pushd ${TORCHCHAT_ROOT}/torchao-build/src
   echo $pwd
 
-  cp -R /Users/scroy/fbsource/fbcode/pytorch/ao .
+  cp -R ${HOME}/fbsource/fbcode/pytorch/ao .
   # git clone https://github.com/pytorch/ao.git
   # cd ao
   # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt)

From 0d5da6785d7b158f4078c4e41cc2bc742e184979 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 18 Sep 2024 16:21:30 -0700
Subject: [PATCH 12/39] remove single-threaded reference in docs

---
 docs/quantization.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/quantization.md b/docs/quantization.md
index 6245e8b6d..aea8a8dc6 100644
--- a/docs/quantization.md
+++ b/docs/quantization.md
@@ -178,7 +178,6 @@ python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"lin
 ```
 
 Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file.
-Also note that the ExecuTorch op that wraps the new torchao kernel is currently single threaded.
 
 ## Quantization Profiles
 

From 8ee9d0e85b7be6fdc34307a9e695b359cb52cbad Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:23:46 -0700
Subject: [PATCH 13/39] add github workflow for testing

---
 .github/workflows/pull.yml | 100 +++++++++++++++++++++++++++++++++++++
 1 file changed, 100 insertions(+)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index f42a20e22..d7d1f5184 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -1035,3 +1035,103 @@ jobs:
           git submodule update --init
           ./runner/build_android.sh
           echo "Tests complete."
+
+  test-torchao-experimental:
+    strategy:
+      matrix:
+        runner: [macos-14-xlarge]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v3
+        with:
+          submodules: true
+      - name: Setup Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.10.11
+      - name: Setup Xcode
+        if: runner.os == 'macOS'
+        uses: maxim-lobanov/setup-xcode@v1
+        with:
+          xcode-version: '15.3'
+      - name: Print machine info
+        run: |
+          uname -a
+          if [ $(uname -s) == Darwin ]; then
+            sysctl machdep.cpu.brand_string
+            sysctl machdep.cpu.core_count
+          fi
+      - name: Install torchchat
+        run: |
+          echo "Intalling pip3 packages"
+          ./install/install_requirements.sh
+          pip3 list
+          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
+      - name: Install torchao-experimental
+        id: install-torchao-experimental
+        run: |
+          bash torchchat/utils/scripts/build_torchao_experimental.sh
+      - name: Set git shas
+        id: setup-hash
+        run: |
+          export TORCHCHAT_ROOT=${PWD}
+          echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
+      - name: Load or install ET
+        id: install-et
+        uses: actions/cache@v3
+        env:
+          cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}
+        with:
+          path: ./et-build
+          key: ${{env.cache-key}}
+          restore-keys: |
+            ${{env.cache-key}}
+      - if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
+        continue-on-error: true
+        run: |
+          echo "Installing ExecuTorch"
+          bash torchchat/utils/scripts/build_native.sh et link_torchao
+      - name: Install ET pip
+        run: |
+          echo "ET build directory"
+          ls et-build | cat
+
+          pushd et-build/src/executorch
+          if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then
+            echo "Mismatched hash.  Make sure branch install_et.sh matches branch from Github cache."
+            echo "On commit $(git rev-parse HEAD)"
+            echo "Expected commit ${{env.et-git-hash}}"
+            exit 1
+          fi
+          pip install .
+          popd
+      - name: Install runner AOTI
+        id: install-runner-aoti
+        run: |
+          bash torchchat/utils/scripts/build_native.sh aoti link_torchao
+      - name: Run inference
+        run: |
+          python torchchat.py download stories110M
+          wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
+
+          export PRMT="Once upon a time in a land far away"
+
+          echo "Generate eager"
+          python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}"  --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
+
+          echo "Generate compile"
+          python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}"  --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
+
+          echo "Export and run ET (C++ runner)"
+          python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
+          ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
+
+          echo "Export and run AOTI (C++ runner)"
+          python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
+          ./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}"
+
+          echo "Generate AOTI"
+          python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}"
+
+          echo "Tests complete."

From 3bd1389a7e6cd2761a4fae7f11768ee4baecd816 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 09:52:31 -0700
Subject: [PATCH 14/39] fix torchchat-root in install script

---
 install/requirements.txt                 |  2 +-
 torchchat/utils/scripts/build_native.sh  | 10 +---------
 torchchat/utils/scripts/install_utils.sh |  4 ++--
 3 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/install/requirements.txt b/install/requirements.txt
index bbb1d56d1..bc4592d44 100644
--- a/install/requirements.txt
+++ b/install/requirements.txt
@@ -12,7 +12,7 @@ tiktoken
 # Miscellaneous
 snakeviz
 sentencepiece
-numpy < 2.0
+numpy==1.23.5
 gguf
 lm-eval==0.4.2
 blobfile
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index d422f6ae0..85c3cd4c4 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -56,15 +56,7 @@ while (( "$#" )); do
   esac
 done
 
-if [ -z "${TORCHCHAT_ROOT}" ]; then
-    # Get the absolute path of the current script
-    SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-    # Get the absolute path of the parent directory
-    TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")"
-    source "$TORCHCHAT_ROOT/scripts/install_utils.sh"
-else
-    source "$TORCHCHAT_ROOT/torchchat/utils/scripts/install_utils.sh"
-fi
+source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh"
 
 if [ -z "${ET_BUILD_DIR}" ]; then
     ET_BUILD_DIR="et-build"
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index e4bbaadd3..e6f8acde7 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -8,8 +8,8 @@
 set -ex pipefail
 
 if [ -z "$TORCHCHAT_ROOT" ]; then
-  echo "Defaulting TORCHCHAT_ROOT to $PWD since it is unset."
-  TORCHCHAT_ROOT=$PWD
+  TORCHCHAT_ROOT="$(dirname "${BASH_SOURCE[0]}")/../../.."
+  echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset."
 fi
 
 install_pip_dependencies() {

From 2241286329f9c19fed732700abb575940e9bac34 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 10:25:37 -0700
Subject: [PATCH 15/39] bug fixes

---
 install/requirements.txt | 2 +-
 torchchat/model.py       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/install/requirements.txt b/install/requirements.txt
index bc4592d44..935fa182b 100644
--- a/install/requirements.txt
+++ b/install/requirements.txt
@@ -12,7 +12,7 @@ tiktoken
 # Miscellaneous
 snakeviz
 sentencepiece
-numpy==1.23.5
+numpy==1.21.3
 gguf
 lm-eval==0.4.2
 blobfile
diff --git a/torchchat/model.py b/torchchat/model.py
index 79bd1f188..ea7710a27 100644
--- a/torchchat/model.py
+++ b/torchchat/model.py
@@ -932,7 +932,7 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor:
     from executorch.extension.pybindings import portable_lib as exec_lib
 
     # ET changed the way it's loading the custom ops so it's not included in portable_lib but has to be loaded separately.
-    from executorch.examples.models.llama2.custom_ops import sdpa_with_kv_cache  # no-qa
+    from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # no-qa
 
     class PTEModel(nn.Module):
         def __init__(self, config, path) -> None:

From 1803d233e459934c03e14821339ecb774e992e8f Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 10:49:17 -0700
Subject: [PATCH 16/39] ci fixes

---
 install/requirements.txt                 | 2 +-
 torchchat/utils/scripts/install_utils.sh | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/install/requirements.txt b/install/requirements.txt
index 935fa182b..7bb3b74b5 100644
--- a/install/requirements.txt
+++ b/install/requirements.txt
@@ -12,7 +12,7 @@ tiktoken
 # Miscellaneous
 snakeviz
 sentencepiece
-numpy==1.21.3
+numpy>=1.23.5,<2.0
 gguf
 lm-eval==0.4.2
 blobfile
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index e6f8acde7..83a52446a 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -8,7 +8,9 @@
 set -ex pipefail
 
 if [ -z "$TORCHCHAT_ROOT" ]; then
-  TORCHCHAT_ROOT="$(dirname "${BASH_SOURCE[0]}")/../../.."
+  # Get the absolute path of the current script
+  SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
+  TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")/../../.."
   echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset."
 fi
 

From c5173cd66e7ff8b675e8a44db7b1483c7ec254d9 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 11:00:43 -0700
Subject: [PATCH 17/39] fix

---
 torchchat/utils/scripts/install_utils.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 83a52446a..50e498ae8 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -10,7 +10,7 @@ set -ex pipefail
 if [ -z "$TORCHCHAT_ROOT" ]; then
   # Get the absolute path of the current script
   SCRIPT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-  TORCHCHAT_ROOT="$(dirname "$SCRIPT_PATH")/../../.."
+  TORCHCHAT_ROOT="$SCRIPT_PATH/../../.."
   echo "Defaulting TORCHCHAT_ROOT to $TORCHCHAT_ROOT since it is unset."
 fi
 

From fb96a394220a57adfbc0c18b301ac5b489cff554 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 11:56:08 -0700
Subject: [PATCH 18/39] ci fixes

---
 .github/workflows/pull.yml | 31 ++++++-------------------------
 runner/build_android.sh    |  1 +
 2 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index d7d1f5184..93486d412 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -452,7 +452,6 @@ jobs:
           echo "Intalling pip3 packages"
           ./install/install_requirements.sh
 
-          export TORCHCHAT_ROOT=$PWD
           ./torchchat/utils/scripts/install_et.sh
 
           pip3 list
@@ -621,6 +620,9 @@ jobs:
           python torchchat.py remove stories15m
 
   test-mps:
+    uses: actions/setup-python@v2
+      with:
+        python-version: 3.10.11
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
@@ -733,6 +735,9 @@ jobs:
 
           echo "Tests complete."
   test-mps-dtype:
+    uses: actions/setup-python@v2
+      with:
+        python-version: 3.10.11
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # needs MPS, was macos-m1-stable
@@ -915,30 +920,6 @@ jobs:
         run: |
           echo "Installing ExecuTorch"
           bash torchchat/utils/scripts/build_native.sh et
-      - name: Install ET pip
-        run: |
-          echo "ET build directory"
-          ls et-build | cat
-
-          pushd et-build/src/executorch
-          if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then
-            echo "Mismatched hash.  Make sure branch install_et.sh matches branch from Github cache."
-            echo "On commit $(git rev-parse HEAD)"
-            echo "Expected commit ${{env.et-git-hash}}"
-            exit 1
-          fi
-          pip install .
-          popd
-      - name: Install runner
-        run: |
-          # Pull submodules (re2, abseil) for Tiktoken
-          git submodule sync
-          git submodule update --init
-
-          export TORCHCHAT_ROOT=${PWD}
-          cmake -S . -B ./cmake-out -G Ninja
-          cmake --build ./cmake-out --target et_run
-
       - name: Run inference
         run: |
           python torchchat.py download stories15M
diff --git a/runner/build_android.sh b/runner/build_android.sh
index c32185957..0d1d0201b 100755
--- a/runner/build_android.sh
+++ b/runner/build_android.sh
@@ -22,6 +22,7 @@ fi
 export ET_BUILD_DIR="et-build-android"
 export CMAKE_OUT_DIR="cmake-out-android"
 export EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT="OFF"
+export EXECUTORCH_BUILD_EXTENSION_TENSOR="ON"
 export EXECUTORCH_BUILD_KERNELS_CUSTOM="ON"
 export CMAKE_OUT_DIR="cmake-out-android"
 # export DCMAKE_INSTALL_PREFIX=cmake-out-android

From 4eb7bdae30b3621338c64888dabd6feca4017829 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:01:20 -0700
Subject: [PATCH 19/39] fix ci

---
 .github/workflows/pull.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 93486d412..f71b668b6 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -620,12 +620,10 @@ jobs:
           python torchchat.py remove stories15m
 
   test-mps:
-    uses: actions/setup-python@v2
-      with:
-        python-version: 3.10.11
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
+      python-version: 3.10.11
       script: |
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
@@ -735,12 +733,10 @@ jobs:
 
           echo "Tests complete."
   test-mps-dtype:
-    uses: actions/setup-python@v2
-      with:
-        python-version: 3.10.11
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # needs MPS, was macos-m1-stable
+      python-version: 3.10.11
       script: |
         set -x
         # NS/MC: Remove previous installation of torch and torchao first

From f569c4e27d2db9274be19cede9c5cbc8002cde43 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:37:24 -0700
Subject: [PATCH 20/39] ci fixes

---
 runner/build_android.sh                  |  3 ---
 torchchat/utils/scripts/install_utils.sh | 11 +++--------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/runner/build_android.sh b/runner/build_android.sh
index 0d1d0201b..fab222a52 100755
--- a/runner/build_android.sh
+++ b/runner/build_android.sh
@@ -22,11 +22,8 @@ fi
 export ET_BUILD_DIR="et-build-android"
 export CMAKE_OUT_DIR="cmake-out-android"
 export EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT="OFF"
-export EXECUTORCH_BUILD_EXTENSION_TENSOR="ON"
 export EXECUTORCH_BUILD_KERNELS_CUSTOM="ON"
 export CMAKE_OUT_DIR="cmake-out-android"
-# export DCMAKE_INSTALL_PREFIX=cmake-out-android
-#
 
 build_runner_et() {
   rm -rf cmake-out-android
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 50e498ae8..29be634f4 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -104,7 +104,8 @@ COMMON_CMAKE_ARGS="\
     -DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \
     -DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \
     -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \
-    -DEXECUTORCH_BUILD_XNNPACK=ON"
+    -DEXECUTORCH_BUILD_XNNPACK=ON \
+    -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
 
 install_executorch() {
   # AOT lib has to be build for model export
@@ -155,13 +156,7 @@ install_executorch() {
 }
 
 install_executorch_libs() {
-  # Install executorch python and C++ libs
-  export CMAKE_ARGS="\
-    ${COMMON_CMAKE_ARGS} \
-    -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-    -DCMAKE_INSTALL_PREFIX=${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install"
-  export CMAKE_BUILD_ARGS="--target install"
-
+  install_executorch
   install_executorch_python_libs $1
 }
 

From d7fefb91321151e1e8b077d441b69efc37adeee5 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 12:47:45 -0700
Subject: [PATCH 21/39] ci update

---
 .github/workflows/pull.yml | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index f71b668b6..38de14d25 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -916,6 +916,9 @@ jobs:
         run: |
           echo "Installing ExecuTorch"
           bash torchchat/utils/scripts/build_native.sh et
+      - name: Install runner
+        run: |
+          bash torchchat/utils/scripts/build_native.sh et
       - name: Run inference
         run: |
           python torchchat.py download stories15M
@@ -1069,20 +1072,9 @@ jobs:
         run: |
           echo "Installing ExecuTorch"
           bash torchchat/utils/scripts/build_native.sh et link_torchao
-      - name: Install ET pip
+      - name: Install runner
         run: |
-          echo "ET build directory"
-          ls et-build | cat
-
-          pushd et-build/src/executorch
-          if [ $(git rev-parse HEAD) != ${{env.et-git-hash}} ]; then
-            echo "Mismatched hash.  Make sure branch install_et.sh matches branch from Github cache."
-            echo "On commit $(git rev-parse HEAD)"
-            echo "Expected commit ${{env.et-git-hash}}"
-            exit 1
-          fi
-          pip install .
-          popd
+          bash torchchat/utils/scripts/build_native.sh et link_torchao
       - name: Install runner AOTI
         id: install-runner-aoti
         run: |

From 0d5a97bfe5a6ecf82a31b982eeba20527e760556 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 13:00:16 -0700
Subject: [PATCH 22/39] ci fixes

---
 .github/workflows/pull.yml              |  6 ++++--
 torchchat/utils/scripts/build_native.sh | 14 +++++++++++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 38de14d25..169aa6ba5 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -918,7 +918,8 @@ jobs:
           bash torchchat/utils/scripts/build_native.sh et
       - name: Install runner
         run: |
-          bash torchchat/utils/scripts/build_native.sh et
+          echo "Installing runner"
+          bash torchchat/utils/scripts/build_native.sh et skip_et_install
       - name: Run inference
         run: |
           python torchchat.py download stories15M
@@ -1074,7 +1075,8 @@ jobs:
           bash torchchat/utils/scripts/build_native.sh et link_torchao
       - name: Install runner
         run: |
-          bash torchchat/utils/scripts/build_native.sh et link_torchao
+          echo "Installing runner"
+          bash torchchat/utils/scripts/build_native.sh et skip_et_install link_torchao
       - name: Install runner AOTI
         id: install-runner-aoti
         run: |
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index 85c3cd4c4..48396ba80 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -27,6 +27,7 @@ if [ $# -eq 0 ]; then
 fi
 
 LINK_TORCHAO=OFF
+SKIP_ET_INSTALL=OFF
 while (( "$#" )); do
   case "$1" in
     -h|--help)
@@ -48,6 +49,11 @@ while (( "$#" )); do
       LINK_TORCHAO=ON
       shift
       ;;
+    skip_et_install)
+      echo "Skipping ET install..."
+      SKIP_ET_INSTALL=ON
+      shift
+      ;;
     *)
       echo "Invalid option: $1"
       show_help
@@ -68,9 +74,11 @@ git submodule update --init
 git submodule sync
 if [[ "$TARGET" == "et" ]]; then
     find_cmake_prefix_path
-    install_pip_dependencies
-    clone_executorch
-    install_executorch_libs false
+    if [[ "$SKIP_ET_INSTALL" == "OFF" ]]; then
+      install_pip_dependencies
+      clone_executorch
+      install_executorch_libs false
+    fi
 
     if [[ "$LINK_TORCHAO" == "ON" ]]; then
       EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"

From a3197091e610ff4284676b8acb0e37097262ba75 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 13:10:26 -0700
Subject: [PATCH 23/39] ci fixes

---
 .github/workflows/pull.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 169aa6ba5..438b4cdd7 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -916,6 +916,11 @@ jobs:
         run: |
           echo "Installing ExecuTorch"
           bash torchchat/utils/scripts/build_native.sh et
+      - name: Install ExecuTorch python
+        run: |
+          echo "Install ExecuTorch python"
+          source "torchchat/utils/scripts/install_utils.sh"
+          install_executorch_python_libs
       - name: Install runner
         run: |
           echo "Installing runner"

From e5c671d338644f959c73e50ff588aafee0c963ce Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 13:47:54 -0700
Subject: [PATCH 24/39] ci fixes

---
 .github/workflows/pull.yml               |  9 ++++---
 runner/build_android.sh                  |  2 +-
 torchchat/utils/scripts/build_native.sh  | 31 ++++++++++++------------
 torchchat/utils/scripts/install_utils.sh |  4 +--
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 438b4cdd7..40b567e7b 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -915,16 +915,17 @@ jobs:
         continue-on-error: true
         run: |
           echo "Installing ExecuTorch"
-          bash torchchat/utils/scripts/build_native.sh et
+          bash torchchat/utils/scripts/install_et.sh
       - name: Install ExecuTorch python
         run: |
           echo "Install ExecuTorch python"
+          export TORCHCHAT_ROOT=$PWD
           source "torchchat/utils/scripts/install_utils.sh"
           install_executorch_python_libs
       - name: Install runner
         run: |
           echo "Installing runner"
-          bash torchchat/utils/scripts/build_native.sh et skip_et_install
+          bash torchchat/utils/scripts/build_native.sh et
       - name: Run inference
         run: |
           python torchchat.py download stories15M
@@ -1077,11 +1078,11 @@ jobs:
         continue-on-error: true
         run: |
           echo "Installing ExecuTorch"
-          bash torchchat/utils/scripts/build_native.sh et link_torchao
+          bash torchchat/utils/scripts/install_et.sh
       - name: Install runner
         run: |
           echo "Installing runner"
-          bash torchchat/utils/scripts/build_native.sh et skip_et_install link_torchao
+          bash torchchat/utils/scripts/build_native.sh et link_torchao
       - name: Install runner AOTI
         id: install-runner-aoti
         run: |
diff --git a/runner/build_android.sh b/runner/build_android.sh
index fab222a52..c0ad02d7b 100755
--- a/runner/build_android.sh
+++ b/runner/build_android.sh
@@ -41,5 +41,5 @@ install_executorch_python_libs $ENABLE_ET_PYBIND
 export CMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake
 export ANDROID_ABI=arm64-v8a
 export ANDROID_PLATFORM=android-23
-install_executorch
+install_executorch_cpp_libs
 build_runner_et
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index 48396ba80..3f2984574 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -27,7 +27,6 @@ if [ $# -eq 0 ]; then
 fi
 
 LINK_TORCHAO=OFF
-SKIP_ET_INSTALL=OFF
 while (( "$#" )); do
   case "$1" in
     -h|--help)
@@ -49,11 +48,6 @@ while (( "$#" )); do
       LINK_TORCHAO=ON
       shift
       ;;
-    skip_et_install)
-      echo "Skipping ET install..."
-      SKIP_ET_INSTALL=ON
-      shift
-      ;;
     *)
       echo "Invalid option: $1"
       show_help
@@ -73,18 +67,23 @@ pushd ${TORCHCHAT_ROOT}
 git submodule update --init
 git submodule sync
 if [[ "$TARGET" == "et" ]]; then
-    find_cmake_prefix_path
-    if [[ "$SKIP_ET_INSTALL" == "OFF" ]]; then
-      install_pip_dependencies
-      clone_executorch
-      install_executorch_libs false
-    fi
+  if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install" ]; then
+    echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install does not exist."
+    echo "Make sure you run install_executorch_libs"
+    exit 1
+  fi
 
-    if [[ "$LINK_TORCHAO" == "ON" ]]; then
-      EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
-      EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
-      install_torchao_custom_executorch_ops
+  if [[ "$LINK_TORCHAO" == "ON" ]]; then
+    if [ ! -d "${TORCHCHAT_ROOT}/torchao-build" ]; then
+      echo "Directory ${TORCHCHAT_ROOT}/torchao-build does not exist."
+      echo "Make sure you run clone_torchao"
+      exit 1
     fi
+    find_cmake_prefix_path
+    EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
+    EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
+    install_torchao_custom_executorch_ops
+  fi
 fi
 popd
 
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 29be634f4..265332861 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -107,7 +107,7 @@ COMMON_CMAKE_ARGS="\
     -DEXECUTORCH_BUILD_XNNPACK=ON \
     -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
 
-install_executorch() {
+install_executorch_cpp_libs() {
   # AOT lib has to be build for model export
   # So by default it is built, and you can explicitly opt-out
   EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF
@@ -156,7 +156,7 @@ install_executorch() {
 }
 
 install_executorch_libs() {
-  install_executorch
+  install_executorch_cpp_libs
   install_executorch_python_libs $1
 }
 

From 9b7a1975b1da86451dd4b2d00f067f94184131c3 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:03:48 -0700
Subject: [PATCH 25/39] ci fixes

---
 .github/workflows/pull.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 40b567e7b..c29ffec6d 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -452,6 +452,7 @@ jobs:
           echo "Intalling pip3 packages"
           ./install/install_requirements.sh
 
+          export TORCHCHAT_ROOT=$PWD
           ./torchchat/utils/scripts/install_et.sh
 
           pip3 list

From c7984c63e5a7de3fdff2c49f86488e37a9bf4f79 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:11:41 -0700
Subject: [PATCH 26/39] ci update

---
 .github/workflows/pull.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index c29ffec6d..6f123114a 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -920,9 +920,9 @@ jobs:
       - name: Install ExecuTorch python
         run: |
           echo "Install ExecuTorch python"
-          export TORCHCHAT_ROOT=$PWD
-          source "torchchat/utils/scripts/install_utils.sh"
-          install_executorch_python_libs
+          pushd et-build/src/executorch
+          sh ./install_requirements.sh
+          popd
       - name: Install runner
         run: |
           echo "Installing runner"

From 832e96bb45a6ff0f248daba8da54afba672d1d63 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:32:47 -0700
Subject: [PATCH 27/39] update et pin

---
 .github/workflows/pull.yml | 4 +++-
 install/.pins/et-pin.txt   | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 6f123114a..987394222 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -921,7 +921,9 @@ jobs:
         run: |
           echo "Install ExecuTorch python"
           pushd et-build/src/executorch
-          sh ./install_requirements.sh
+          chmod +x ./install_requirements.sh
+          chmod +x ./install_requirements.py
+          ./install_requirements.sh
           popd
       - name: Install runner
         run: |
diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt
index 0a15fd2b5..af7ef4377 100644
--- a/install/.pins/et-pin.txt
+++ b/install/.pins/et-pin.txt
@@ -1 +1 @@
-58700faa262ddf45b223353c120ffaf6b2003711
+c75711cb329cab3df91fb9083a18373f9a568377

From db4e77ce0523251a69b74e94de18c5763d108157 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:38:06 -0700
Subject: [PATCH 28/39] update python version

---
 .github/workflows/pull.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 987394222..5da6c86e8 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -434,7 +434,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10.11
+          python-version: '3.10.11'
       - name: Setup Xcode
         if: runner.os == 'macOS'
         uses: maxim-lobanov/setup-xcode@v1
@@ -577,7 +577,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10.11
+          python-version: '3.10.11'
       - name: Print machine info
         run: |
           uname -a
@@ -624,7 +624,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
-      python-version: 3.10.11
+      python-version: '3.10'
       script: |
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
@@ -737,7 +737,7 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # needs MPS, was macos-m1-stable
-      python-version: 3.10.11
+      python-version: '3.10'
       script: |
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
@@ -878,7 +878,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10.11
+          python-version: 3.10
       - name: Setup Xcode
         if: runner.os == 'macOS'
         uses: maxim-lobanov/setup-xcode@v1

From 461be776a03ccdba2e7c20ee36a5010cee11a0e4 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 14:42:06 -0700
Subject: [PATCH 29/39] update python version

---
 .github/workflows/pull.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 5da6c86e8..47d1a8d87 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -624,8 +624,8 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # neeps MPS, was macos-m1-stable
-      python-version: '3.10'
       script: |
+        export PYTHON_VERSION="3.10"
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
         # as this script does not install anything into conda env but rather as system dep
@@ -737,8 +737,8 @@ jobs:
     uses: pytorch/test-infra/.github/workflows/macos_job.yml@main
     with:
       runner: macos-m1-stable  # needs MPS, was macos-m1-stable
-      python-version: '3.10'
       script: |
+        export PYTHON_VERSION="3.10"
         set -x
         # NS/MC: Remove previous installation of torch and torchao first
         # as this script does not install anything into conda env but rather as system dep
@@ -878,7 +878,7 @@ jobs:
       - name: Setup Python
         uses: actions/setup-python@v2
         with:
-          python-version: 3.10
+          python-version: 3.10.11
       - name: Setup Xcode
         if: runner.os == 'macOS'
         uses: maxim-lobanov/setup-xcode@v1

From eb58bf4be30b12b3ef754e8662161ccd4df12411 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 15:06:08 -0700
Subject: [PATCH 30/39] init

---
 .github/workflows/pull.yml                    | 90 -------------------
 docs/quantization.md                          | 60 -------------
 install/.pins/torchao-experimental-pin.txt    |  1 -
 runner/aoti.cmake                             |  4 -
 runner/et.cmake                               | 24 -----
 torchchat/utils/quantize.py                   | 47 +---------
 torchchat/utils/scripts/build_native.sh       | 22 +----
 .../scripts/build_torchao_experimental.sh     | 16 ----
 torchchat/utils/scripts/install_utils.sh      | 49 ----------
 9 files changed, 6 insertions(+), 307 deletions(-)
 delete mode 100644 install/.pins/torchao-experimental-pin.txt
 delete mode 100644 torchchat/utils/scripts/build_torchao_experimental.sh

diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
index 47d1a8d87..87e0825b3 100644
--- a/.github/workflows/pull.yml
+++ b/.github/workflows/pull.yml
@@ -1025,93 +1025,3 @@ jobs:
           git submodule update --init
           ./runner/build_android.sh
           echo "Tests complete."
-
-  test-torchao-experimental:
-    strategy:
-      matrix:
-        runner: [macos-14-xlarge]
-    runs-on: ${{matrix.runner}}
-    steps:
-      - name: Checkout repo
-        uses: actions/checkout@v3
-        with:
-          submodules: true
-      - name: Setup Python
-        uses: actions/setup-python@v2
-        with:
-          python-version: 3.10.11
-      - name: Setup Xcode
-        if: runner.os == 'macOS'
-        uses: maxim-lobanov/setup-xcode@v1
-        with:
-          xcode-version: '15.3'
-      - name: Print machine info
-        run: |
-          uname -a
-          if [ $(uname -s) == Darwin ]; then
-            sysctl machdep.cpu.brand_string
-            sysctl machdep.cpu.core_count
-          fi
-      - name: Install torchchat
-        run: |
-          echo "Intalling pip3 packages"
-          ./install/install_requirements.sh
-          pip3 list
-          python3 -c 'import torch;print(f"torch: {torch.__version__, torch.version.git_version}")'
-      - name: Install torchao-experimental
-        id: install-torchao-experimental
-        run: |
-          bash torchchat/utils/scripts/build_torchao_experimental.sh
-      - name: Set git shas
-        id: setup-hash
-        run: |
-          export TORCHCHAT_ROOT=${PWD}
-          echo "et-git-hash=$(cat ${TORCHCHAT_ROOT}/install/.pins/et-pin.txt)" >> "$GITHUB_ENV"
-      - name: Load or install ET
-        id: install-et
-        uses: actions/cache@v3
-        env:
-          cache-key: et-build-${{runner.os}}-${{runner.arch}}-${{env.et-git-hash}}
-        with:
-          path: ./et-build
-          key: ${{env.cache-key}}
-          restore-keys: |
-            ${{env.cache-key}}
-      - if: ${{ steps.install-et.outputs.cache-hit != 'true' }}
-        continue-on-error: true
-        run: |
-          echo "Installing ExecuTorch"
-          bash torchchat/utils/scripts/install_et.sh
-      - name: Install runner
-        run: |
-          echo "Installing runner"
-          bash torchchat/utils/scripts/build_native.sh et link_torchao
-      - name: Install runner AOTI
-        id: install-runner-aoti
-        run: |
-          bash torchchat/utils/scripts/build_native.sh aoti link_torchao
-      - name: Run inference
-        run: |
-          python torchchat.py download stories110M
-          wget -O ./tokenizer.model https://github.com/karpathy/llama2.c/raw/master/tokenizer.model
-
-          export PRMT="Once upon a time in a land far away"
-
-          echo "Generate eager"
-          python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}"  --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
-
-          echo "Generate compile"
-          python torchchat.py generate stories110M --temperature 0 --prompt "${PRMT}"  --device cpu --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
-
-          echo "Export and run ET (C++ runner)"
-          python torchchat.py export stories110M --output-pte-path ./model.pte --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
-          ./cmake-out/et_run ./model.pte -z ./tokenizer.model -t 0 -i "${PRMT}"
-
-          echo "Export and run AOTI (C++ runner)"
-          python torchchat.py export stories110M --output-dso-path ./model.so --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
-          ./cmake-out/aoti_run ./model.so -z ./tokenizer.model -t 0 -i "${PRMT}"
-
-          echo "Generate AOTI"
-          python torchchat.py generate stories110M --dso-path ./model.so --prompt "${PRMT}"
-
-          echo "Tests complete."
diff --git a/docs/quantization.md b/docs/quantization.md
index aea8a8dc6..bac6e12cc 100644
--- a/docs/quantization.md
+++ b/docs/quantization.md
@@ -118,66 +118,6 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr
 python3 torchchat.py generate llama3 --pte-path llama3.pte  --prompt "Hello my name is"
 ```
 
-## Experimental TorchAO lowbit kernels
-
-### Use
-The quantization scheme a8wxdq dynamically quantizes activations to 8 bits, and quantizes the weights in a groupwise manner with a specified bitwidth and groupsize.
-It takes arguments bitwidth (2, 3, 4, 5, 6, 7), groupsize, and has_weight_zeros (true, false).
-The argument has_weight_zeros indicates whether the weights are quantized with scales only (has_weight_zeros: false) or with both scales and zeros (has_weight_zeros: true).
-Roughly speaking, {bitwidth: 4, groupsize: 256, has_weight_zeros: false} is similar to GGML's Q40 quantization scheme.
-
-You should expect high performance on ARM CPU if bitwidth is 2, 3, 4, or 5 and groupsize is divisible by 16.  With other platforms and argument choices, a slow fallback kernel will be used.  You will see warnings about this during quantization.
-
-### Setup
-To use a8wxdq, you must set up the torchao experimental kernels.  These will only work on devices with ARM CPUs, for example on Mac computers with Apple Silicon.
-
-From the torchchat root directory, run
-```
-sh torchchat/utils/scripts/build_torchao_experimental.sh
-```
-
-This should take about 10 seconds to complete.  Once finished, you can use a8wxdq in torchchat.
-
-Note: if you want to use the new kernels in the AOTI and C++ runners, you must pass the flag link_torchao when running the scripts the build the runners.
-
-```
-sh torchchat/utils/scripts/build_native.sh aoti link_torchao
-```
-
-```
-sh torchchat/utils/scripts/build_native.sh et link_torchao
-```
-
-### Examples
-
-#### Eager mode
-```
-python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}'
-```
-
-#### torch.compile
-```
-python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
-```
-
-As with PyTorch in general, you can experiment with performance on a difference number of threads by defining OMP_NUM_THREADS.  For example,
-
-```
-OMP_NUM_THREADS=6 python3 torchchat.py generate llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --compile
-```
-
-#### AOTI
-```
-python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-dso llama3.so
-python3 torchchat.py generate llama3 --dso-path llama3_1.so --prompt "Hello my name is"
-```
-
-#### ExecuTorch
-```
-python torchchat.py export llama3 --device cpu --dtype float32 --quantize '{"linear:a8wxdq": {"bitwidth": 4, "groupsize": 256, "has_weight_zeros": false}}' --output-pte llama3.pte
-```
-
-Note: only the ExecuTorch C++ runner in torchchat when built using the instructions in the setup can run the exported *.pte file.
 
 ## Quantization Profiles
 
diff --git a/install/.pins/torchao-experimental-pin.txt b/install/.pins/torchao-experimental-pin.txt
deleted file mode 100644
index 9b101777d..000000000
--- a/install/.pins/torchao-experimental-pin.txt
+++ /dev/null
@@ -1 +0,0 @@
-3fa38aaf1276e36845a82fb399e5054718a441c4
diff --git a/runner/aoti.cmake b/runner/aoti.cmake
index 35e4c1329..156e9bcce 100644
--- a/runner/aoti.cmake
+++ b/runner/aoti.cmake
@@ -28,7 +28,3 @@ if(Torch_FOUND)
     target_link_libraries(aoti_run "${TORCH_LIBRARIES}" m)
     set_property(TARGET aoti_run PROPERTY CXX_STANDARD 17)
 endif()
-
-if (LINK_TORCHAO_CUSTOM_OPS)
-    target_link_libraries(aoti_run "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_ATEN${CMAKE_SHARED_LIBRARY_SUFFIX}")
-endif()
diff --git a/runner/et.cmake b/runner/et.cmake
index 12c7fca02..27d799873 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -112,30 +112,6 @@ if(executorch_FOUND)
     target_link_libraries(et_run PRIVATE log)
   endif()
 
-  if(LINK_TORCHAO_CUSTOM_OPS)
-    # target_link_libraries(et_run PRIVATE "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH${CMAKE_SHARED_LIBRARY_SUFFIX}")
-    target_link_libraries(et_run PRIVATE "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/liblinear_a8wxdq_EXECUTORCH.a>")
-    target_link_libraries(et_run PRIVATE
-      "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_ops_linear_EXECUTORCH.a"
-      "${TORCHCHAT_ROOT}/torchao-build/cmake-out/lib/libtorchao_kernels_aarch64.a"
-    )
-  endif()
-
-  # Adding target_link_options_shared_lib as commented out below leads to this:
-  #
-  # CMake Error at Utils.cmake:22 (target_link_options):
-  #   Cannot specify link options for target
-  #   "/Users/scroy/etorch/torchchat/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a"
-  #   which is not built by this project.
-  # Call Stack (most recent call first):
-  #   Utils.cmake:30 (macos_kernel_link_options)
-  #   CMakeLists.txt:41 (target_link_options_shared_lib)
-  #
-  #target_link_options_shared_lib("${TORCHCHAT_ROOT}/et-build/src/executorch/${CMAKE_OUT_DIR}/examples/models/llama2/custom_ops/libcustom_ops_lib.a") # This one does not get installed by ExecuTorch
-
-  # This works on mac, but appears to run into issues on linux
-  # It is needed to solve:
-  # E 00:00:00.055965 executorch:method.cpp:536] Missing operator: [8] llama::sdpa_with_kv_cache.out
 else()
   MESSAGE(WARNING "ExecuTorch package not found")
 endif()
diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py
index 041f074c2..a0d9248a9 100644
--- a/torchchat/utils/quantize.py
+++ b/torchchat/utils/quantize.py
@@ -96,19 +96,10 @@ def quantize_model(
                 precision = get_precision()
 
             try:
-                if quantizer == "linear:a8wxdq":
-                    quant_handler = ao_quantizer_class_dict[quantizer](
-                        device=device,
-                        precision=precision,
-                        bitwidth=q_kwargs.get("bitwidth", 4),
-                        groupsize=q_kwargs.get("groupsize", 128),
-                        has_weight_zeros=q_kwargs.get("has_weight_zeros", False),
-                    )
-                else:
-                    # Easier to ask forgiveness than permission
-                    quant_handler = ao_quantizer_class_dict[quantizer](
-                        groupsize=q_kwargs["groupsize"], device=device, precision=precision
-                    )
+                # Easier to ask forgiveness than permission
+                quant_handler = ao_quantizer_class_dict[quantizer](
+                    groupsize=q_kwargs["groupsize"], device=device, precision=precision
+                )
             except TypeError as e:
                 if "unexpected keyword argument 'device'" in str(e):
                     quant_handler = ao_quantizer_class_dict[quantizer](
@@ -870,33 +861,3 @@ def quantized_model(self) -> nn.Module:
     "linear:int4": Int4WeightOnlyQuantizer,
     "linear:a8w4dq": Int8DynActInt4WeightQuantizer,
 }
-
-try:
-    import importlib.util
-    import sys
-    import os
-    torchao_build_path = f"{os.getcwd()}/torchao-build"
-
-    # Try loading quantizer
-    torchao_experimental_quant_api_spec = importlib.util.spec_from_file_location(
-        "torchao_experimental_quant_api",
-        f"{torchao_build_path}/src/ao/torchao/experimental/quant_api.py",
-    )
-    torchao_experimental_quant_api = importlib.util.module_from_spec(torchao_experimental_quant_api_spec)
-    sys.modules["torchao_experimental_quant_api"] = torchao_experimental_quant_api
-    torchao_experimental_quant_api_spec.loader.exec_module(torchao_experimental_quant_api)
-    from torchao_experimental_quant_api import Int8DynActIntxWeightQuantizer
-    ao_quantizer_class_dict["linear:a8wxdq"] = Int8DynActIntxWeightQuantizer
-
-    # Try loading custom op
-    try:
-        import glob
-        libs = glob.glob(f"{torchao_build_path}/cmake-out/lib/liblinear_a8wxdq_ATEN.*")
-        libs = list(filter(lambda l: (l.endswith("so") or l.endswith("dylib")), libs))
-        torch.ops.load_library(libs[0])
-    except Exception as e:
-        print("Failed to torchao custom op library with error: ", e)
-        print("Slow fallback kernels will be used.")
-
-except Exception as e:
-    print(f"Failed to load torchao experimental a8wxdq quantizer with error: {e}")
diff --git a/torchchat/utils/scripts/build_native.sh b/torchchat/utils/scripts/build_native.sh
index 3f2984574..924b86a65 100755
--- a/torchchat/utils/scripts/build_native.sh
+++ b/torchchat/utils/scripts/build_native.sh
@@ -26,7 +26,6 @@ if [ $# -eq 0 ]; then
     exit 1
 fi
 
-LINK_TORCHAO=OFF
 while (( "$#" )); do
   case "$1" in
     -h|--help)
@@ -43,11 +42,6 @@ while (( "$#" )); do
       TARGET="et"
       shift
       ;;
-    link_torchao)
-      echo "Linking with torchao custom ops..."
-      LINK_TORCHAO=ON
-      shift
-      ;;
     *)
       echo "Invalid option: $1"
       show_help
@@ -72,26 +66,14 @@ if [[ "$TARGET" == "et" ]]; then
     echo "Make sure you run install_executorch_libs"
     exit 1
   fi
-
-  if [[ "$LINK_TORCHAO" == "ON" ]]; then
-    if [ ! -d "${TORCHCHAT_ROOT}/torchao-build" ]; then
-      echo "Directory ${TORCHCHAT_ROOT}/torchao-build does not exist."
-      echo "Make sure you run clone_torchao"
-      exit 1
-    fi
-    find_cmake_prefix_path
-    EXECUTORCH_INCLUDE_DIRS="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/include;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src"
-    EXECUTORCH_LIBRARIES="${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libexecutorch_no_prim_ops.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libextension_threadpool.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcpuinfo.a;${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libpthreadpool.a"
-    install_torchao_custom_executorch_ops
-  fi
 fi
 popd
 
 # CMake commands
 if [[ "$TARGET" == "et" ]]; then
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DET_USE_ADAPTIVE_THREADS=ON -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=1" -G Ninja
 else
-    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DLINK_TORCHAO_CUSTOM_OPS="${LINK_TORCHAO}" -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
+    cmake -S . -B ./cmake-out -DCMAKE_PREFIX_PATH=`python3 -c 'import torch;print(torch.utils.cmake_prefix_path)'` -DCMAKE_CXX_FLAGS="-D_GLIBCXX_USE_CXX11_ABI=0" -G Ninja
 fi
 cmake --build ./cmake-out --target "${TARGET}"_run
 
diff --git a/torchchat/utils/scripts/build_torchao_experimental.sh b/torchchat/utils/scripts/build_torchao_experimental.sh
deleted file mode 100644
index 1df3e80c6..000000000
--- a/torchchat/utils/scripts/build_torchao_experimental.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/bash
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-
-
-
-source "$(dirname "${BASH_SOURCE[0]}")/install_utils.sh"
-
-pushd ${TORCHCHAT_ROOT}
-find_cmake_prefix_path
-clone_torchao
-install_torchao_custom_aten_ops
-popd
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 265332861..fafc8eccd 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -159,52 +159,3 @@ install_executorch_libs() {
   install_executorch_cpp_libs
   install_executorch_python_libs $1
 }
-
-clone_torchao() {
-  echo "Cloning torchao to ${TORCHCHAT_ROOT}/torchao-build/src"
-  rm -rf ${TORCHCHAT_ROOT}/torchao-build/src
-  mkdir -p ${TORCHCHAT_ROOT}/torchao-build/src
-  pushd ${TORCHCHAT_ROOT}/torchao-build/src
-  echo $pwd
-
-  cp -R ${HOME}/fbsource/fbcode/pytorch/ao .
-  # git clone https://github.com/pytorch/ao.git
-  # cd ao
-  # git checkout $(cat ${TORCHCHAT_ROOT}/intstall/.pins/torchao-experimental-pin.txt)
-
-  popd
-}
-
-install_torchao_custom_aten_ops() {
-  echo "Building torchao custom ops for ATen"
-  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
-
-  CMAKE_OUT_DIR=${TORCHCHAT_ROOT}/torchao-build/cmake-out
-  cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-    -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
-    -DCMAKE_BUILD_TYPE="Release" \
-    -DTORCHAO_OP_TARGET="ATEN" \
-    -S . \
-    -B ${CMAKE_OUT_DIR} -G Ninja
-  cmake --build  ${CMAKE_OUT_DIR} --target install --config Release
-
-  popd
-}
-
-install_torchao_custom_executorch_ops() {
-  echo "Building torchao custom ops for ExecuTorch"
-  pushd ${TORCHCHAT_ROOT}/torchao-build/src/ao/torchao/experimental
-
-  CMAKE_OUT_DIR="${TORCHCHAT_ROOT}/torchao-build/cmake-out"
-  cmake -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
-    -DCMAKE_INSTALL_PREFIX=${CMAKE_OUT_DIR} \
-    -DCMAKE_BUILD_TYPE="Release" \
-    -DTORCHAO_OP_TARGET="EXECUTORCH" \
-    -DEXECUTORCH_INCLUDE_DIRS="${EXECUTORCH_INCLUDE_DIRS}" \
-    -DEXECUTORCH_LIBRARIES="${EXECUTORCH_LIBRARIES}" \
-    -S . \
-    -B ${CMAKE_OUT_DIR} -G Ninja
-  cmake --build  ${CMAKE_OUT_DIR} --target install --config Release
-
-  popd
-}

From cb2238d5fefbeccef716008b484db83c35819e35 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 15:09:23 -0700
Subject: [PATCH 31/39] typo

---
 .gitignore           | 1 -
 docs/quantization.md | 1 -
 2 files changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index ee856fcd2..3f25b76c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -14,7 +14,6 @@ __pycache__/
 # Build directories
 build/android/*
 et-build/*
-torchao-build/*
 runner-et/cmake-out/*
 runner-aoti/cmake-out/*
 cmake-out/
diff --git a/docs/quantization.md b/docs/quantization.md
index bac6e12cc..1f619e58e 100644
--- a/docs/quantization.md
+++ b/docs/quantization.md
@@ -118,7 +118,6 @@ python3 torchchat.py export llama3 --quantize '{"embedding": {"bitwidth": 4, "gr
 python3 torchchat.py generate llama3 --pte-path llama3.pte  --prompt "Hello my name is"
 ```
 
-
 ## Quantization Profiles
 
 Four [sample profiles](https://github.com/pytorch/torchchat/tree/main/torchchat/quant_config/) are included with the torchchat distribution: `cuda.json`, `desktop.json`, `mobile.json`, `pi5.json`

From d974f5008210c7123d75b271b4040afb2a252622 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 16:39:57 -0700
Subject: [PATCH 32/39] update pytorch pin

---
 install/.pins/et-pin.txt                 | 2 +-
 install/install_requirements.sh          | 4 ++--
 torchchat/utils/scripts/install_utils.sh | 1 +
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/install/.pins/et-pin.txt b/install/.pins/et-pin.txt
index af7ef4377..01c77f102 100644
--- a/install/.pins/et-pin.txt
+++ b/install/.pins/et-pin.txt
@@ -1 +1 @@
-c75711cb329cab3df91fb9083a18373f9a568377
+af098c31b6f8d5f38e40a5cf35784b0969d97df8
diff --git a/install/install_requirements.sh b/install/install_requirements.sh
index b698315ff..47fd5b36d 100755
--- a/install/install_requirements.sh
+++ b/install/install_requirements.sh
@@ -47,10 +47,10 @@ fi
 # NOTE: If a newly-fetched version of the executorch repo changes the value of
 # PYTORCH_NIGHTLY_VERSION, you should re-run this script to install the necessary
 # package versions.
-PYTORCH_NIGHTLY_VERSION=dev20240814
+PYTORCH_NIGHTLY_VERSION=dev20240901
 
 # Nightly version for torchvision
-VISION_NIGHTLY_VERSION=dev20240814
+VISION_NIGHTLY_VERSION=dev20240901
 
 # Nightly version for torchtune
 TUNE_NIGHTLY_VERSION=dev20240916
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index fafc8eccd..06d978cfa 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -75,6 +75,7 @@ clone_executorch() {
   clone_executorch_internal
 }
 
+
 install_executorch_python_libs() {
   if [ ! -d "${TORCHCHAT_ROOT}/${ET_BUILD_DIR}" ]; then
     echo "Directory ${TORCHCHAT_ROOT}/${ET_BUILD_DIR} does not exist."

From efe74b8dc8f2401e4cd2d4d79a268749b1a922d4 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Thu, 19 Sep 2024 17:05:14 -0700
Subject: [PATCH 33/39] tensor_ptr arg order change

---
 runner/run.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runner/run.cpp b/runner/run.cpp
index 99eb7bfb9..52d596749 100644
--- a/runner/run.cpp
+++ b/runner/run.cpp
@@ -213,8 +213,8 @@ float* forward(Transformer* transformer, int token, int pos) {
                              .to(torch::kCPU);
   auto logits = result[0].data_ptr();
 #else // __ET_MODEL__
-  TensorPtr pos_managed = make_tensor_ptr(ScalarType::Long, {1}, pos_buffer);
-  TensorPtr tokens_managed = make_tensor_ptr(ScalarType::Long, {1, 1}, token_buffer);
+  TensorPtr pos_managed = make_tensor_ptr({1}, pos_buffer, ScalarType::Long);
+  TensorPtr tokens_managed = make_tensor_ptr({1, 1}, token_buffer, ScalarType::Long);
   std::vector<EValue> inputs;
   auto tmp1 = EValue(tokens_managed);
   auto tmp2 = EValue(pos_managed);

From 501675575e2d08ff895baf7537e8354bba24a206 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Fri, 20 Sep 2024 10:37:37 -0700
Subject: [PATCH 34/39] fixes

---
 install/requirements.txt | 2 +-
 runner/et.cmake          | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/install/requirements.txt b/install/requirements.txt
index 7bb3b74b5..1647ce00e 100644
--- a/install/requirements.txt
+++ b/install/requirements.txt
@@ -12,7 +12,7 @@ tiktoken
 # Miscellaneous
 snakeviz
 sentencepiece
-numpy>=1.23.5,<2.0
+numpy<2.0
 gguf
 lm-eval==0.4.2
 blobfile
diff --git a/runner/et.cmake b/runner/et.cmake
index 27d799873..9024182a1 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -62,6 +62,7 @@ if(executorch_FOUND)
 
     set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch)
     set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack)
+    list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
     list(APPEND _common_include_directories
          ${XNNPACK_ROOT}/third-party/cpuinfo/include)
 

From 4ce0de853225aa942f3f057f101d473938e0b2a6 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Fri, 20 Sep 2024 10:50:16 -0700
Subject: [PATCH 35/39] update

---
 runner/et.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runner/et.cmake b/runner/et.cmake
index 9024182a1..0f8c8e908 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -62,7 +62,7 @@ if(executorch_FOUND)
 
     set(EXECUTORCH_SRC_ROOT ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/src/executorch)
     set(XNNPACK_ROOT ${EXECUTORCH_SRC_ROOT}/backends/xnnpack)
-    list(APPEND _srcs ${XNNPACK_ROOT}/threadpool/cpuinfo_utils.cpp)
+    list(APPEND _srcs ${EXECUTORCH_SRC_ROOT}/extension/threadpool/cpuinfo_utils.cpp)
     list(APPEND _common_include_directories
          ${XNNPACK_ROOT}/third-party/cpuinfo/include)
 

From db88dc84e5d9a3d2f7baf8d316dcbcd9f06f4d9f Mon Sep 17 00:00:00 2001
From: Mengwei Liu <larryliu@meta.com>
Date: Mon, 23 Sep 2024 10:11:00 -0700
Subject: [PATCH 36/39] Add import for quantized decomposed ops

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 torchchat/model.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/torchchat/model.py b/torchchat/model.py
index ea7710a27..81c06e495 100644
--- a/torchchat/model.py
+++ b/torchchat/model.py
@@ -932,6 +932,9 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor:
     from executorch.extension.pybindings import portable_lib as exec_lib
 
     # ET changed the way it's loading the custom ops so it's not included in portable_lib but has to be loaded separately.
+    # For quantized_decomposed ops
+    from executorch.kernels import quantized  # no-qa
+    # For llama::sdpa_with_kv_cache.out, preprocess ops
     from executorch.extension.llm.custom_ops import sdpa_with_kv_cache  # no-qa
 
     class PTEModel(nn.Module):

From 517ec519d348f22ac67356a3b32d8a3557cf1638 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Mon, 23 Sep 2024 10:40:02 -0700
Subject: [PATCH 37/39] remove whole archive from custom_op

---
 install/requirements.txt                 | 2 +-
 runner/et.cmake                          | 5 +++--
 torchchat/utils/scripts/install_et.sh    | 2 +-
 torchchat/utils/scripts/install_utils.sh | 3 ++-
 4 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/install/requirements.txt b/install/requirements.txt
index 1647ce00e..bbb1d56d1 100644
--- a/install/requirements.txt
+++ b/install/requirements.txt
@@ -12,7 +12,7 @@ tiktoken
 # Miscellaneous
 snakeviz
 sentencepiece
-numpy<2.0
+numpy < 2.0
 gguf
 lm-eval==0.4.2
 blobfile
diff --git a/runner/et.cmake b/runner/et.cmake
index 0f8c8e908..0fbac2fce 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -95,10 +95,13 @@ if(executorch_FOUND)
     XNNPACK
     pthreadpool
     cpuinfo
+    custom_ops
   )
   target_link_options_shared_lib(optimized_native_cpu_ops_lib)
   target_link_options_shared_lib(quantized_ops_lib)
   target_link_options_shared_lib(xnnpack_backend)
+  target_link_options_shared_lib(custom_ops)
+
   # Not clear why linking executorch as whole-archive outside android/apple is leading
   # to double registration. Most likely because of linkage issues.
   # Will figure this out later. Until then use this.
@@ -106,8 +109,6 @@ if(executorch_FOUND)
     target_link_options_shared_lib(executorch)
   endif()
 
-  target_link_libraries(et_run PRIVATE
-  "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a>")
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)
diff --git a/torchchat/utils/scripts/install_et.sh b/torchchat/utils/scripts/install_et.sh
index 22c3ac80a..1d8c6e2b2 100755
--- a/torchchat/utils/scripts/install_et.sh
+++ b/torchchat/utils/scripts/install_et.sh
@@ -17,7 +17,7 @@ ENABLE_ET_PYBIND="${1:-true}"
 
 pushd ${TORCHCHAT_ROOT}
 find_cmake_prefix_path
-install_pip_dependencies
 clone_executorch
 install_executorch_libs $ENABLE_ET_PYBIND
+install_executorch_python_libs $ENABLE_ET_PYBIND
 popd
diff --git a/torchchat/utils/scripts/install_utils.sh b/torchchat/utils/scripts/install_utils.sh
index 06d978cfa..2da3d044c 100644
--- a/torchchat/utils/scripts/install_utils.sh
+++ b/torchchat/utils/scripts/install_utils.sh
@@ -148,7 +148,6 @@ install_executorch_cpp_libs() {
         -DCMAKE_PREFIX_PATH=${MY_CMAKE_PREFIX_PATH} \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM_AOT=${EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR} \
         -DEXECUTORCH_BUILD_KERNELS_CUSTOM=${EXECUTORCH_BUILD_KERNELS_CUSTOM_VAR} \
-        -DEXECUTORCH_BUILD_XNNPACK=ON \
         ${CROSS_COMPILE_ARGS} \
         -S . -B ${CMAKE_OUT_DIR} -G Ninja
   cmake --build ${CMAKE_OUT_DIR}
@@ -157,6 +156,8 @@ install_executorch_cpp_libs() {
 }
 
 install_executorch_libs() {
+  EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT_VAR=OFF
+  EXECUTORCH_BUILD_KERNELS_CUSTOM_VAR=OFF
   install_executorch_cpp_libs
   install_executorch_python_libs $1
 }

From e5a325ea635bc43a1757d370f31af0a73309addd Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Mon, 23 Sep 2024 10:52:16 -0700
Subject: [PATCH 38/39] add imported lib

---
 runner/et.cmake | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/runner/et.cmake b/runner/et.cmake
index 0fbac2fce..2aa3efd6f 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -69,6 +69,8 @@ if(executorch_FOUND)
     list(APPEND _common_include_directories
          ${XNNPACK_ROOT}/third-party/pthreadpool/include)
   endif()
+  add_library(custom_ops STATIC IMPORTED)
+  set_property(TARGET custom_ops PROPERTY IMPORTED_LOCATION ${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a)
 
   target_include_directories(executorch INTERFACE ${_common_include_directories}) # Ideally ExecuTorch installation process would do this
   add_executable(et_run ${_srcs})
@@ -109,6 +111,8 @@ if(executorch_FOUND)
     target_link_options_shared_lib(executorch)
   endif()
 
+  # target_link_libraries(et_run PRIVATE
+  # "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a>")
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)

From c79397b8da6874e9601707c00c22d7119ff773b6 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Mon, 23 Sep 2024 10:52:32 -0700
Subject: [PATCH 39/39] add imported lib

---
 runner/et.cmake | 2 --
 1 file changed, 2 deletions(-)

diff --git a/runner/et.cmake b/runner/et.cmake
index 2aa3efd6f..99e67a025 100644
--- a/runner/et.cmake
+++ b/runner/et.cmake
@@ -111,8 +111,6 @@ if(executorch_FOUND)
     target_link_options_shared_lib(executorch)
   endif()
 
-  # target_link_libraries(et_run PRIVATE
-  # "$<LINK_LIBRARY:WHOLE_ARCHIVE,${TORCHCHAT_ROOT}/${ET_BUILD_DIR}/install/lib/libcustom_ops.a>")
   # This one is needed for cpuinfo where it uses android specific log lib
   if(ANDROID)
     target_link_libraries(et_run PRIVATE log)