From 8d67342646d0686c72c801db083fedf412b616a2 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 10 Sep 2025 12:33:21 -0700
Subject: [PATCH 1/3] Bump torchao pin and use v2 torchao tensors

---
 backends/vulkan/test/test_vulkan_delegate.py  |  7 ++++--
 backends/xnnpack/test/ops/test_linear.py      | 10 ++++++--
 .../llama/source_transformation/quantize.py   | 24 +++++++++++++------
 third-party/ao                                |  2 +-
 4 files changed, 31 insertions(+), 12 deletions(-)

diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py
index 01547d7140d..f8194f0b32c 100644
--- a/backends/vulkan/test/test_vulkan_delegate.py
+++ b/backends/vulkan/test/test_vulkan_delegate.py
@@ -2680,14 +2680,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             def apply_8da4w_quantization(self):
                 """Apply TorchAO 8da4w quantization (int8 dynamic activation + int4 weight)."""
                 from torchao.quantization import (
-                    int8_dynamic_activation_int4_weight,
+                    Int8DynamicActivationIntxWeightConfig,
                     quantize_,
                 )
+                from torchao.quantization.granularity import PerGroup
                 from torchao.utils import unwrap_tensor_subclass
 
                 quantize_(
                     self,
-                    int8_dynamic_activation_int4_weight(group_size=self.group_size),
+                    Int8DynamicActivationIntxWeightConfig(
+                        weight_dtype=torch.int4, granularity=PerGroup(self.group_size)
+                    ),
                 )
                 unwrap_tensor_subclass(self)
                 return self
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py
index 421e59c0b08..ac6fec25732 100644
--- a/backends/xnnpack/test/ops/test_linear.py
+++ b/backends/xnnpack/test/ops/test_linear.py
@@ -34,8 +34,9 @@
 from torch.export.graph_signature import ExportGraphSignature, InputKind
 
 try:
+    from torchao.quantization.granularity import PerGroup
     from torchao.quantization.quant_api import (
-        int8_dynamic_activation_int4_weight,
+        Int8DynamicActivationIntxWeightConfig,
         quantize_,
     )
     from torchao.utils import unwrap_tensor_subclass
@@ -391,7 +392,12 @@ def _test_groupwise_dq_linear(
         """
         Helper function to test groupwise dynamic quantized linear op with different configurations.
         """
-        quantize_(mod, int8_dynamic_activation_int4_weight(group_size=group_size))
+        quantize_(
+            mod,
+            Int8DynamicActivationIntxWeightConfig(
+                weight_dtype=torch.int4, weight_granularity=PerGroup(group_size)
+            ),
+        )
         unwrap_tensor_subclass(mod)
         DynamicallyQuantizedPartitioner = XnnpackPartitioner(
             config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py
index 9f2210b5c64..835972b7f3e 100644
--- a/examples/models/llama/source_transformation/quantize.py
+++ b/examples/models/llama/source_transformation/quantize.py
@@ -116,7 +116,6 @@ def quantize(  # noqa C901
         assert len(matches) == 1, f"Expected 1 match for pattern but got {len(matches)}"
         bitwidth = int(matches[0][0])
 
-        from torchao.dtypes import PackedLinearInt8DynamicActivationIntxWeightLayout
         from torchao.quantization.granularity import PerAxis, PerGroup
         from torchao.quantization.quant_api import (
             Int8DynamicActivationIntxWeightConfig,
@@ -136,7 +135,7 @@ def quantize(  # noqa C901
                         PerAxis(0) if group_size == 0 else PerGroup(group_size)
                     ),
                     weight_mapping_type=MappingType.SYMMETRIC,
-                    layout=PackedLinearInt8DynamicActivationIntxWeightLayout(),
+                    intx_packing_format="opaque_torchao_auto",
                 ),
             )
             model = unwrap_tensor_subclass(model)
@@ -148,10 +147,21 @@ def quantize(  # noqa C901
             # TODO: Default value for group size for 8da4w. Need this here for refactor, will clean this up.
             group_size = 128
 
-        from torchao.quantization import int8_dynamic_activation_int4_weight, quantize_
+        from torchao.quantization import (
+            Int8DynamicActivationIntxWeightConfig,
+            quantize_,
+        )
+        from torchao.quantization.granularity import PerGroup
         from torchao.utils import unwrap_tensor_subclass
 
-        quantize_(model, int8_dynamic_activation_int4_weight(group_size=group_size))
+        quantize_(
+            model,
+            Int8DynamicActivationIntxWeightConfig(
+                weight_dtype=torch.int4,
+                weight_granularity=PerGroup(group_size),
+            ),
+        )
+
         model = unwrap_tensor_subclass(model)
 
         # TODO: deal with checkpoint / computation dtype decoupling.
@@ -744,9 +754,9 @@ def get_quant_embedding_transform(
     dtype_override: Optional[DType] = None,
 ):
     if embedding_quantize.startswith("torchao:"):
-        from torchao.experimental.quant_api import (
+        from torchao.prototype.quantization.embedding.api import (
             EmbeddingQuantizer,
-            SharedEmbeddingQuantizer,
+            TiedEmbeddingQuantizer,
         )
         from torchao.quantization.granularity import PerAxis, PerGroup
         from torchao.quantization.quant_api import MappingType
@@ -780,7 +790,7 @@ def _torchao_embedding_quantizer(model):
                         use_fallback=False,
                     ).quantize(model)
                 else:
-                    SharedEmbeddingQuantizer(
+                    TiedEmbeddingQuantizer(
                         weight_dtype=weight_dtype,
                         granularity=granularity,
                         mapping_type=mapping_type,
diff --git a/third-party/ao b/third-party/ao
index f1acc1e2ade..b99904b34c0 160000
--- a/third-party/ao
+++ b/third-party/ao
@@ -1 +1 @@
-Subproject commit f1acc1e2ade01fef0129a3cee62b3d8e14e22602
+Subproject commit b99904b34c0fd98f8a63ec57cbc1dc4993f74793

From 0bb9e5c4a508e39c2b85c24344fbb3fb237d0298 Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 10 Sep 2025 14:55:19 -0700
Subject: [PATCH 2/3] up

---
 backends/apple/coreml/test/test_coreml_recipes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/backends/apple/coreml/test/test_coreml_recipes.py b/backends/apple/coreml/test/test_coreml_recipes.py
index 313e24922d6..e66e800626f 100644
--- a/backends/apple/coreml/test/test_coreml_recipes.py
+++ b/backends/apple/coreml/test/test_coreml_recipes.py
@@ -3,6 +3,7 @@
 # Please refer to the license found in the LICENSE file in the root directory of the source tree.
 
 
+import copy
 import unittest
 
 import coremltools as ct
@@ -152,8 +153,9 @@ def forward(self, x):
         # Test with different group sizes
         for group_size in [8, 16, 32]:
             with self.subTest(group_size=group_size):
+                model_to_export = copy.deepcopy(model)
                 session = export(
-                    model=model,
+                    model=model_to_export,
                     example_inputs=example_inputs,
                     export_recipe=ExportRecipe.get_recipe(
                         CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP,

From 46cb5b67ef52a38efb01b95b44cbab921b2b64eb Mon Sep 17 00:00:00 2001
From: Scott Roy <161522778+metascroy@users.noreply.github.com>
Date: Wed, 10 Sep 2025 16:23:48 -0700
Subject: [PATCH 3/3] up

---
 backends/apple/coreml/test/test_coreml_recipes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/backends/apple/coreml/test/test_coreml_recipes.py b/backends/apple/coreml/test/test_coreml_recipes.py
index e66e800626f..303d8cb78ed 100644
--- a/backends/apple/coreml/test/test_coreml_recipes.py
+++ b/backends/apple/coreml/test/test_coreml_recipes.py
@@ -221,8 +221,9 @@ def forward(self, x):
         # Test with different group sizes
         for group_size in [16, 32, 64]:
             with self.subTest(group_size=group_size):
+                model_to_export = copy.deepcopy(model)
                 session = export(
-                    model=model,
+                    model=model_to_export,
                     example_inputs=example_inputs,
                     export_recipe=ExportRecipe.get_recipe(
                         CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,