From 8d67342646d0686c72c801db083fedf412b616a2 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 10 Sep 2025 12:33:21 -0700 Subject: [PATCH 1/3] Bump torchao pin and use v2 torchao tensors --- backends/vulkan/test/test_vulkan_delegate.py | 7 ++++-- backends/xnnpack/test/ops/test_linear.py | 10 ++++++-- .../llama/source_transformation/quantize.py | 24 +++++++++++++------ third-party/ao | 2 +- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/backends/vulkan/test/test_vulkan_delegate.py b/backends/vulkan/test/test_vulkan_delegate.py index 01547d7140d..f8194f0b32c 100644 --- a/backends/vulkan/test/test_vulkan_delegate.py +++ b/backends/vulkan/test/test_vulkan_delegate.py @@ -2680,14 +2680,17 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def apply_8da4w_quantization(self): """Apply TorchAO 8da4w quantization (int8 dynamic activation + int4 weight).""" from torchao.quantization import ( - int8_dynamic_activation_int4_weight, + Int8DynamicActivationIntxWeightConfig, quantize_, ) + from torchao.quantization.granularity import PerGroup from torchao.utils import unwrap_tensor_subclass quantize_( self, - int8_dynamic_activation_int4_weight(group_size=self.group_size), + Int8DynamicActivationIntxWeightConfig( + weight_dtype=torch.int4, granularity=PerGroup(self.group_size) + ), ) unwrap_tensor_subclass(self) return self diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py index 421e59c0b08..ac6fec25732 100644 --- a/backends/xnnpack/test/ops/test_linear.py +++ b/backends/xnnpack/test/ops/test_linear.py @@ -34,8 +34,9 @@ from torch.export.graph_signature import ExportGraphSignature, InputKind try: + from torchao.quantization.granularity import PerGroup from torchao.quantization.quant_api import ( - int8_dynamic_activation_int4_weight, + Int8DynamicActivationIntxWeightConfig, quantize_, ) from torchao.utils import unwrap_tensor_subclass @@ -391,7 +392,12 @@ def _test_groupwise_dq_linear( """ Helper function to test groupwise dynamic quantized linear op with different configurations. """ - quantize_(mod, int8_dynamic_activation_int4_weight(group_size=group_size)) + quantize_( + mod, + Int8DynamicActivationIntxWeightConfig( + weight_dtype=torch.int4, weight_granularity=PerGroup(group_size) + ), + ) unwrap_tensor_subclass(mod) DynamicallyQuantizedPartitioner = XnnpackPartitioner( config_precisions=ConfigPrecisionType.DYNAMIC_QUANT, diff --git a/examples/models/llama/source_transformation/quantize.py b/examples/models/llama/source_transformation/quantize.py index 9f2210b5c64..835972b7f3e 100644 --- a/examples/models/llama/source_transformation/quantize.py +++ b/examples/models/llama/source_transformation/quantize.py @@ -116,7 +116,6 @@ def quantize( # noqa C901 assert len(matches) == 1, f"Expected 1 match for pattern but got {len(matches)}" bitwidth = int(matches[0][0]) - from torchao.dtypes import PackedLinearInt8DynamicActivationIntxWeightLayout from torchao.quantization.granularity import PerAxis, PerGroup from torchao.quantization.quant_api import ( Int8DynamicActivationIntxWeightConfig, @@ -136,7 +135,7 @@ def quantize( # noqa C901 PerAxis(0) if group_size == 0 else PerGroup(group_size) ), weight_mapping_type=MappingType.SYMMETRIC, - layout=PackedLinearInt8DynamicActivationIntxWeightLayout(), + intx_packing_format="opaque_torchao_auto", ), ) model = unwrap_tensor_subclass(model) @@ -148,10 +147,21 @@ def quantize( # noqa C901 # TODO: Default value for group size for 8da4w. Need this here for refactor, will clean this up. group_size = 128 - from torchao.quantization import int8_dynamic_activation_int4_weight, quantize_ + from torchao.quantization import ( + Int8DynamicActivationIntxWeightConfig, + quantize_, + ) + from torchao.quantization.granularity import PerGroup from torchao.utils import unwrap_tensor_subclass - quantize_(model, int8_dynamic_activation_int4_weight(group_size=group_size)) + quantize_( + model, + Int8DynamicActivationIntxWeightConfig( + weight_dtype=torch.int4, + weight_granularity=PerGroup(group_size), + ), + ) + model = unwrap_tensor_subclass(model) # TODO: deal with checkpoint / computation dtype decoupling. @@ -744,9 +754,9 @@ def get_quant_embedding_transform( dtype_override: Optional[DType] = None, ): if embedding_quantize.startswith("torchao:"): - from torchao.experimental.quant_api import ( + from torchao.prototype.quantization.embedding.api import ( EmbeddingQuantizer, - SharedEmbeddingQuantizer, + TiedEmbeddingQuantizer, ) from torchao.quantization.granularity import PerAxis, PerGroup from torchao.quantization.quant_api import MappingType @@ -780,7 +790,7 @@ def _torchao_embedding_quantizer(model): use_fallback=False, ).quantize(model) else: - SharedEmbeddingQuantizer( + TiedEmbeddingQuantizer( weight_dtype=weight_dtype, granularity=granularity, mapping_type=mapping_type, diff --git a/third-party/ao b/third-party/ao index f1acc1e2ade..b99904b34c0 160000 --- a/third-party/ao +++ b/third-party/ao @@ -1 +1 @@ -Subproject commit f1acc1e2ade01fef0129a3cee62b3d8e14e22602 +Subproject commit b99904b34c0fd98f8a63ec57cbc1dc4993f74793 From 0bb9e5c4a508e39c2b85c24344fbb3fb237d0298 Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 10 Sep 2025 14:55:19 -0700 Subject: [PATCH 2/3] up --- backends/apple/coreml/test/test_coreml_recipes.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backends/apple/coreml/test/test_coreml_recipes.py b/backends/apple/coreml/test/test_coreml_recipes.py index 313e24922d6..e66e800626f 100644 --- a/backends/apple/coreml/test/test_coreml_recipes.py +++ b/backends/apple/coreml/test/test_coreml_recipes.py @@ -3,6 +3,7 @@ # Please refer to the license found in the LICENSE file in the root directory of the source tree. +import copy import unittest import coremltools as ct @@ -152,8 +153,9 @@ def forward(self, x): # Test with different group sizes for group_size in [8, 16, 32]: with self.subTest(group_size=group_size): + model_to_export = copy.deepcopy(model) session = export( - model=model, + model=model_to_export, example_inputs=example_inputs, export_recipe=ExportRecipe.get_recipe( CoreMLRecipeType.TORCHAO_INT4_WEIGHT_ONLY_PER_GROUP, From 46cb5b67ef52a38efb01b95b44cbab921b2b64eb Mon Sep 17 00:00:00 2001 From: Scott Roy <161522778+metascroy@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:23:48 -0700 Subject: [PATCH 3/3] up --- backends/apple/coreml/test/test_coreml_recipes.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backends/apple/coreml/test/test_coreml_recipes.py b/backends/apple/coreml/test/test_coreml_recipes.py index e66e800626f..303d8cb78ed 100644 --- a/backends/apple/coreml/test/test_coreml_recipes.py +++ b/backends/apple/coreml/test/test_coreml_recipes.py @@ -221,8 +221,9 @@ def forward(self, x): # Test with different group sizes for group_size in [16, 32, 64]: with self.subTest(group_size=group_size): + model_to_export = copy.deepcopy(model) session = export( - model=model, + model=model_to_export, example_inputs=example_inputs, export_recipe=ExportRecipe.get_recipe( CoreMLRecipeType.TORCHAO_INT8_WEIGHT_ONLY_PER_GROUP,