pytorch · facebook-github-bot · May 24, 2025 · May 24, 2025
@@ -222,9 +222,6 @@ def extract_qdq_affine_op_args_for_decomposed_ops(node: torch.fx.Node):
 
     # add target_dtype_node after quant_min/quant_max
     args.append(target_dtype)
-    # zero_point_domain
-    if len(node.args) > 7 and node.args[7] != "INT":
-        return None, None
 
     if is_per_channel_group(node):
         block_sizes = cast(list[int], node.args[1])

@@ -1017,7 +1017,6 @@ def embedding_byte_dtype_pattern(
             torch.int8,
             -128,
             127,
-            "INT",
             output_dtype,
         )
         return torch.ops.aten.embedding.default(dq, indices)
@@ -1062,7 +1061,6 @@ def embedding_2bit_dtype_pattern(
             torch.int8,
             -2,
             1,
-            "INT",
             output_dtype,
         )
         return torch.ops.aten.embedding.default(dq, indices)
@@ -1110,7 +1108,6 @@ def embedding_4bit_dtype_pattern(
             torch.int8,
             -8,
             7,
-            "INT",
             output_dtype,
         )
         return torch.ops.aten.embedding.default(dq, indices)
+25 −0		setup.py
+1 −8		test/dtypes/test_uintx.py
+10 −0		test/float8/test_base.py
+223 −0		test/prototype/inductor/test_int8_sdpa_fusion.py
+27 −163		test/quantization/test_quant_primitives.py
+0 −6		test/sparsity/test_marlin.py
+140 −1		test/test_ops.py
+1 −0		torchao/core/config.py
+1,910 −0		torchao/csrc/cpu/int8_sdpa.cpp
+185 −73		torchao/dtypes/affine_quantized_tensor.py
+14 −3		torchao/dtypes/affine_quantized_tensor_ops.py
+6 −8		torchao/dtypes/uintx/int4_cpu_layout.py
+6 −8		torchao/dtypes/uintx/tensor_core_tiled_layout.py
+1 −2		torchao/experimental/quant_passes.py
+89 −0		torchao/ops.py
+0 −0		torchao/prototype/inductor/__init__.py
+35 −0		torchao/prototype/inductor/fx_passes/README.md
+5 −0		torchao/prototype/inductor/fx_passes/__init__.py
+392 −0		torchao/prototype/inductor/fx_passes/int8_sdpa_fusion.py
+28 −9		torchao/prototype/parq/quant/uniform_torchao.py
+4 −0		torchao/quantization/__init__.py
+0 −4		torchao/quantization/pt2e/observer.py
+38 −13		torchao/quantization/qat/affine_fake_quantized_tensor.py
+8 −7		torchao/quantization/quant_api.py
+645 −206		torchao/quantization/quant_primitives.py
+59 −17		torchao/quantization/utils.py