diff --git a/.lintrunner.toml b/.lintrunner.toml index d4cf2531ce1..5e7b4ff0951 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -390,7 +390,6 @@ exclude_patterns = [ "backends/arm/test/ops/**", "backends/vulkan/quantizer/**", "backends/vulkan/test/**", - "backends/cadence/aot/quantizer/**", "backends/qualcomm/quantizer/**", "examples/qualcomm/**", "backends/xnnpack/quantizer/**", diff --git a/backends/cadence/aot/compiler.py b/backends/cadence/aot/compiler.py index a54954a3e3c..438f07ba15f 100644 --- a/backends/cadence/aot/compiler.py +++ b/backends/cadence/aot/compiler.py @@ -123,7 +123,7 @@ def prepare_and_convert_pt2( assert isinstance(model_gm, torch.fx.GraphModule) # Prepare - prepared_model = prepare_pt2e(model_gm, quantizer) # pyre-ignore[6] + prepared_model = prepare_pt2e(model_gm, quantizer) # Calibrate # If no calibration data is provided, use the inputs diff --git a/backends/cadence/aot/quantizer/TARGETS b/backends/cadence/aot/quantizer/TARGETS index 75eab631dd4..c612a2e19fb 100644 --- a/backends/cadence/aot/quantizer/TARGETS +++ b/backends/cadence/aot/quantizer/TARGETS @@ -9,6 +9,7 @@ python_library( ], deps = [ "//caffe2:torch", + "//pytorch/ao:torchao", ], ) @@ -34,7 +35,6 @@ python_library( ":patterns", ":utils", "//caffe2:torch", - "//executorch/backends/xnnpack/quantizer:xnnpack_quantizer_utils", ], ) diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py index 66f6772d942..cd6a7287793 100644 --- a/backends/cadence/aot/quantizer/patterns.py +++ b/backends/cadence/aot/quantizer/patterns.py @@ -15,7 +15,7 @@ from torch import fx from torch._ops import OpOverload -from torch.ao.quantization.quantizer import ( +from torchao.quantization.pt2e.quantizer import ( DerivedQuantizationSpec, SharedQuantizationSpec, ) diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index 761b2bf8d31..0cad34f7ffe 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -29,19 +29,20 @@ is_annotated, no_outside_users, ) -from executorch.backends.xnnpack.quantizer.xnnpack_quantizer_utils import ( + +from torch import fx + +from torchao.quantization.pt2e import HistogramObserver, MinMaxObserver +from torchao.quantization.pt2e.quantizer import ( + ComposableQuantizer, + DerivedQuantizationSpec, OperatorConfig, QuantizationAnnotation, QuantizationConfig, QuantizationSpec, + Quantizer, ) -from torch import fx - -from torch.ao.quantization.observer import HistogramObserver, MinMaxObserver -from torch.ao.quantization.quantizer import DerivedQuantizationSpec, Quantizer -from torch.ao.quantization.quantizer.composable_quantizer import ComposableQuantizer - act_qspec_asym8s = QuantizationSpec( dtype=torch.int8, diff --git a/backends/cadence/aot/quantizer/utils.py b/backends/cadence/aot/quantizer/utils.py index 0f9c9399780..fad5ca41e22 100644 --- a/backends/cadence/aot/quantizer/utils.py +++ b/backends/cadence/aot/quantizer/utils.py @@ -14,13 +14,13 @@ import torch from torch import fx from torch._ops import OpOverload -from torch.ao.quantization import ObserverOrFakeQuantize from torch.fx import GraphModule from torch.fx.passes.utils.source_matcher_utils import ( check_subgraphs_connected, SourcePartition, ) +from torchao.quantization.pt2e import ObserverOrFakeQuantize def quantize_tensor_multiplier(