diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index e5e83d3e255..b9186884d5e 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -54,7 +54,13 @@ ) from torch import fx from torch.ao.quantization.quantizer.utils import _annotate_output_qspec -from torchao.quantization.pt2e import HistogramObserver, MinMaxObserver +from torchao.quantization.pt2e import ( + FakeQuantize, + FusedMovingAvgObsFakeQuantize, + HistogramObserver, + MinMaxObserver, + MovingAverageMinMaxObserver, +) from torchao.quantization.pt2e.quantizer import ( ComposableQuantizer, DerivedQuantizationSpec, @@ -154,78 +160,120 @@ def get_supported_operators(cls) -> list[OperatorConfig]: # Quantization Specification used by Neutron NPU -act_qspec = QuantizationSpec( - dtype=torch.int8, - quant_min=-128, - quant_max=127, - qscheme=torch.per_tensor_affine, - is_dynamic=False, - observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12), -) - -wgt_qspec = QuantizationSpec( - dtype=torch.int8, - quant_min=-127, - quant_max=127, - qscheme=torch.per_tensor_symmetric, - is_dynamic=False, - observer_or_fake_quant_ctr=MinMaxObserver, - ch_axis=0, -) +def act_qspec(is_qat: bool): + eps = 2**-12 + observer_or_fake_quant_ctr = ( + FusedMovingAvgObsFakeQuantize.with_args( + observer=MovingAverageMinMaxObserver, eps=eps + ) + if is_qat + else HistogramObserver.with_args(eps=eps) + ) + + return QuantizationSpec( + dtype=torch.int8, + quant_min=-128, + quant_max=127, + qscheme=torch.per_tensor_affine, + is_dynamic=False, + observer_or_fake_quant_ctr=observer_or_fake_quant_ctr, + ) + + +def wgt_qspec(is_qat: bool): + observer_or_fake_quant_ctr = ( + FakeQuantize.with_args(observer=MovingAverageMinMaxObserver) + if is_qat + else MinMaxObserver + ) + + return QuantizationSpec( + dtype=torch.int8, + quant_min=-127, + quant_max=127, + qscheme=torch.per_tensor_symmetric, + is_dynamic=False, + observer_or_fake_quant_ctr=observer_or_fake_quant_ctr, + ch_axis=0, + ) + + +def wgt_fc_qspec(is_qat: bool): + observer_or_fake_quant_ctr = ( + FakeQuantize.with_args(observer=MovingAverageMinMaxObserver) + if is_qat + else MinMaxObserver + ) + + return QuantizationSpec( + dtype=torch.int8, + quant_min=-127, + quant_max=127, + qscheme=torch.per_tensor_symmetric, + is_dynamic=False, + observer_or_fake_quant_ctr=observer_or_fake_quant_ctr, + ) -wgt_fc_qspec = QuantizationSpec( - dtype=torch.int8, - quant_min=-127, - quant_max=127, - qscheme=torch.per_tensor_symmetric, - is_dynamic=False, - observer_or_fake_quant_ctr=MinMaxObserver, -) # Is set by the *PatternQuantizer directly. bias_qspec = None class NeutronQuantizer(ComposableQuantizer): - def __init__(self, neutron_target_spec: NeutronTargetSpec): + def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False): self.neutron_target_spec = neutron_target_spec - static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None) - static_fc_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_fc_qspec, None) + self.is_qat = is_qat + + static_qconfig = QuantizationConfig( + act_qspec(is_qat=is_qat), + act_qspec(is_qat=is_qat), + wgt_qspec(is_qat=is_qat), + None, + ) + static_fc_qconfig = QuantizationConfig( + act_qspec(is_qat=is_qat), + act_qspec(is_qat=is_qat), + wgt_fc_qspec(is_qat=is_qat), + None, + ) + + OpQuantizer = NeutronAtenQuantizer super().__init__( [ - NeutronAtenQuantizer(AbsPattern(), static_qconfig), - NeutronAtenQuantizer(AdaptiveAvgPoolPattern(), static_qconfig), - NeutronAtenQuantizer(AddTensorPattern(), static_qconfig), - NeutronAtenQuantizer(AddmmPattern(self), static_fc_qconfig), - NeutronAtenQuantizer(AvgPoolPattern(), static_qconfig), - NeutronAtenQuantizer(CatPattern(), static_qconfig), - NeutronAtenQuantizer(Conv1dPattern(), static_qconfig), - NeutronAtenQuantizer(Conv2dPattern(self), static_qconfig), - NeutronAtenQuantizer(ConvTranspose2dPattern(), static_qconfig), - NeutronAtenQuantizer(DropoutPattern(), static_qconfig), - NeutronAtenQuantizer(FlattenPattern(), static_qconfig), - NeutronAtenQuantizer(HardTanhPattern(), static_qconfig), - NeutronAtenQuantizer(HardTanhInPlacePattern(), static_qconfig), - NeutronAtenQuantizer(LinearPattern(self), static_fc_qconfig), - NeutronAtenQuantizer(MaxPoolPattern(), static_qconfig), - NeutronAtenQuantizer(MeanDimPattern(), static_qconfig), - NeutronAtenQuantizer(MmPattern(self), static_qconfig), - NeutronAtenQuantizer(MulTensorPattern(), static_qconfig), - NeutronAtenQuantizer(PadPattern(), static_qconfig), - NeutronAtenQuantizer(PermutePattern(), static_qconfig), - NeutronAtenQuantizer(ReluPattern(), static_qconfig), - NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig), - NeutronAtenQuantizer(ReshapePattern(), static_qconfig), - NeutronAtenQuantizer(SigmoidPattern(), static_qconfig), - NeutronAtenQuantizer(SliceTensorPattern(), static_qconfig), - NeutronAtenQuantizer(SoftMaxPattern(), static_qconfig), - NeutronAtenQuantizer(SubTensorPattern(), static_qconfig), - NeutronAtenQuantizer(TanhPattern(), static_qconfig), - NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig), - NeutronAtenQuantizer(TransposeIntPattern(), static_qconfig), - NeutronAtenQuantizer(ViewPattern(), static_qconfig), + OpQuantizer(AbsPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AdaptiveAvgPoolPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AddTensorPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(AddmmPattern(self, is_qat=is_qat), static_fc_qconfig), + OpQuantizer(AvgPoolPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(CatPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(Conv1dPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(Conv2dPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(ConvTranspose2dPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(DropoutPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(FlattenPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(HardTanhPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(HardTanhInPlacePattern(is_qat=is_qat), static_qconfig), + OpQuantizer(LinearPattern(self, is_qat=is_qat), static_fc_qconfig), + OpQuantizer(MaxPoolPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MeanDimPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MmPattern(self, is_qat=is_qat), static_qconfig), + OpQuantizer(MulTensorPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(PadPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(PermutePattern(is_qat=is_qat), static_qconfig), + OpQuantizer(ReluPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(ReluInPlacePattern(is_qat=is_qat), static_qconfig), + OpQuantizer(ReshapePattern(is_qat=is_qat), static_qconfig), + OpQuantizer(SigmoidPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(SliceTensorPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(SoftMaxPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(SubTensorPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(TanhPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(TanhInPlacePattern(is_qat=is_qat), static_qconfig), + OpQuantizer(TransposeIntPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(ViewPattern(is_qat=is_qat), static_qconfig), ] ) + # Mapping ops defined in quantizer partition types to its quantizer self.op_to_quantizer = { pt: q for q in self.quantizers for pt in q.pattern.partition_types() @@ -235,7 +283,9 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec): pt: False for q in self.quantizers for pt in q.pattern.partition_types() } self.cluster_quantizers = [ - NeutronAtenQuantizer(ActivationsConcatClusterPattern(self), static_qconfig) + NeutronAtenQuantizer( + ActivationsConcatClusterPattern(self, is_qat=is_qat), static_qconfig + ) ] def transform_for_annotation( @@ -288,7 +338,7 @@ def _annotate_inputs(self, model: fx.GraphModule): continue if node.op == "placeholder" and len(node.users) > 0: - _annotate_output_qspec(node, act_qspec) + _annotate_output_qspec(node, act_qspec(self.is_qat)) self._mark_input_node_as_annotated(node) def validate(self, model: torch.fx.GraphModule) -> None: diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 3bf0afa3a7d..e8f247d4bbc 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -14,7 +14,11 @@ from torch import fx from torch._ops import OpOverload from torch.fx import Node -from torchao.quantization.pt2e import PerChannelMinMaxObserver +from torchao.quantization.pt2e import ( + FakeQuantize, + MovingAveragePerChannelMinMaxObserver, + PerChannelMinMaxObserver, +) from torchao.quantization.pt2e.quantizer import ( DerivedQuantizationSpec, FixedQParamsQuantizationSpec, @@ -59,7 +63,8 @@ class PartitionAnchors: | tuple[fx.Node, NodeArgsIdx, SharedQuantizationSpec], ] = field(default_factory=list) weights: list[ - tuple[fx.Node, NodeArgsIdx] | tuple[fx.Node, NodeArgsIdx, QuantizationSpec], + tuple[fx.Node, NodeArgsIdx] + | tuple[fx.Node, NodeArgsIdx, QuantizationSpec | FakeQuantize], ] = field(default_factory=list) biases: list[ tuple[fx.Node, NodeArgsIdx] @@ -69,12 +74,18 @@ class PartitionAnchors: literals: list[tuple[fx.Node, NodeArgsIdx]] = field(default_factory=list) output: list[ tuple[fx.Node] - | tuple[fx.Node, FixedQParamsQuantizationSpec | SharedQuantizationSpec], + | tuple[ + fx.Node, + FixedQParamsQuantizationSpec | SharedQuantizationSpec, + ], ] = field(default_factory=list) empty: bool = False class QuantizationPattern(ABC): + def __init__(self, is_qat: bool = False): + self.is_qat = is_qat + @abstractmethod def partition_types(self) -> list[OpOverload]: """ @@ -148,11 +159,12 @@ def get_anchors_for_fixed_quant_specs( zero_point: int, quant_min: int = -128, quant_max: int = 127, + is_qat: bool = False, ) -> PartitionAnchors: node = fused_partition[0].nodes[-1] assert len(fused_partition[0].input_nodes) == 1 - qspec = FixedQParamsQuantizationSpec( + qspec_or_fake_quantize = FixedQParamsQuantizationSpec( dtype=torch.int8, scale=scale, zero_point=zero_point, @@ -166,7 +178,7 @@ def get_anchors_for_fixed_quant_specs( weights=[], biases=[], output=[ - (node, qspec), + (node, qspec_or_fake_quantize), ], ) @@ -190,7 +202,9 @@ def partition_types(self): class AddmmPattern(QuantizationPattern): - def __init__(self, neutron_quantizer): + def __init__(self, neutron_quantizer, is_qat: bool): + super().__init__(is_qat=is_qat) + self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info @@ -365,7 +379,11 @@ def get_anchors( ch_axis=0, ) - weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver + weight_observer_or_fake_quant_ctr = ( + FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver) + if self.is_qat + else PerChannelMinMaxObserver + ) weight_quantization_spec = QuantizationSpec( dtype=torch.int8, observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr, @@ -399,7 +417,9 @@ def partition_types(self) -> list[OpOverload]: class Conv2dPattern(ConvPattern): - def __init__(self, neutron_quantizer): + def __init__(self, neutron_quantizer, is_qat: bool = False): + super().__init__(is_qat=is_qat) + self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info @@ -426,7 +446,11 @@ def get_anchors( ch_axis=0, ) - weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver + weight_observer_or_fake_quant_ctr = ( + FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver) + if self.is_qat + else PerChannelMinMaxObserver + ) weight_quantization_spec = QuantizationSpec( dtype=torch.int8, observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr, @@ -563,7 +587,9 @@ def replacement_op(self): class LinearPattern(QuantizationPattern): - def __init__(self, neutron_quantizer): + def __init__(self, neutron_quantizer, is_qat: bool = False): + super().__init__(is_qat=is_qat) + self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info @@ -637,7 +663,9 @@ def partition_types(self): class MmPattern(QuantizationPattern): - def __init__(self, neutron_quantizer): + def __init__(self, neutron_quantizer, is_qat: bool = False): + super().__init__(is_qat=is_qat) + self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info @@ -802,7 +830,7 @@ def get_anchors( self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors: return get_anchors_for_fixed_quant_specs( - fused_partition, scale=1.0 / 256.0, zero_point=-128 + fused_partition, scale=1.0 / 256.0, zero_point=-128, is_qat=self.is_qat ) @@ -820,7 +848,7 @@ def get_anchors( self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors: return get_anchors_for_fixed_quant_specs( - fused_partition, scale=1.0 / 256.0, zero_point=-128 + fused_partition, scale=1.0 / 256.0, zero_point=-128, is_qat=self.is_qat ) @@ -838,7 +866,7 @@ def get_anchors( self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors: return get_anchors_for_fixed_quant_specs( - fused_partition, scale=1.0 / 128.0, zero_point=0 + fused_partition, scale=1.0 / 128.0, zero_point=0, is_qat=self.is_qat ) @@ -856,7 +884,7 @@ def get_anchors( self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors: return get_anchors_for_fixed_quant_specs( - fused_partition, scale=1.0 / 128.0, zero_point=0 + fused_partition, scale=1.0 / 128.0, zero_point=0, is_qat=self.is_qat ) @@ -884,7 +912,9 @@ class ActivationsConcatClusterPattern(QuantizationPattern): │ """ - def __init__(self, neutron_quantizer): + def __init__(self, neutron_quantizer, is_qat: bool = False): + super().__init__(is_qat=is_qat) + self.neutron_quantizer = neutron_quantizer self.neutron_target_info = ( self.neutron_quantizer.neutron_target_spec.neutron_target_info diff --git a/backends/nxp/quantizer/utils.py b/backends/nxp/quantizer/utils.py index 389526111cb..6dc58e8114a 100644 --- a/backends/nxp/quantizer/utils.py +++ b/backends/nxp/quantizer/utils.py @@ -15,13 +15,18 @@ import torch from torch import fx from torch._ops import OpOverload +from torch.ao.quantization import move_exported_model_to_eval from torch.export import ExportedProgram from torch.fx.passes.utils.source_matcher_utils import ( check_subgraphs_connected, SourcePartition, ) from torchao.quantization.pt2e import ObserverOrFakeQuantize -from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e +from torchao.quantization.pt2e.quantize_pt2e import ( + convert_pt2e, + prepare_pt2e, + prepare_qat_pt2e, +) from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY, Quantizer @@ -154,10 +159,11 @@ def find_sequential_partitions_aten( return fused_partitions -def post_training_quantize( +def calibrate_and_quantize( model: ExportedProgram | fx.GraphModule, calibration_inputs: Iterable[tuple[torch.Tensor, ...]], quantizer: Quantizer, + is_qat: bool = False, ) -> fx.GraphModule: """Quantize the provided model. @@ -165,6 +171,8 @@ def post_training_quantize( :param calibration_inputs: Either a tuple of calibration input tensors where each element corresponds to a model input. Or an iterator over such tuples. :param quantizer: Quantizer to use. + :param is_qat: Whether quantization is done using Quantization Aware Training (QAT) or not. + Note: In QAT mode, training is not performed. Only calibration (in eval mode) is done. :return: Quantized GraphModule. """ @@ -172,7 +180,12 @@ def post_training_quantize( if isinstance(model, ExportedProgram): model = model.module() - m = prepare_pt2e(model, quantizer) + if is_qat: + m = prepare_qat_pt2e(model, quantizer) + m = move_exported_model_to_eval(m) + else: + m = prepare_pt2e(model, quantizer) + for data in calibration_inputs: m(*data) m = convert_pt2e(m) diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index 76cfd5fa24b..61af7b5c67f 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -26,7 +26,7 @@ from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.exir import ( EdgeCompileConfig, EdgeProgramManager, @@ -59,8 +59,8 @@ def get_random_calibration_inputs( ] -def _get_default_quantizer(target_spec: NeutronTargetSpec) -> Quantizer: - return NeutronQuantizer(target_spec) +def _get_default_quantizer(target_spec: NeutronTargetSpec, use_qat: bool) -> Quantizer: + return NeutronQuantizer(target_spec, is_qat=use_qat) def to_model_input_spec( @@ -93,6 +93,7 @@ def to_quantized_edge_program( ] = get_random_calibration_inputs, target="imxrt700", neutron_converter_flavor=neutron_converter_flavor, + use_qat=False, remove_quant_io_ops=False, custom_delegation_options=CustomDelegationOptions(), # noqa B008 get_quantizer_fn=None, @@ -100,7 +101,9 @@ def to_quantized_edge_program( ) -> EdgeProgramManager: _neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor) if get_quantizer_fn is None: - get_quantizer_fn = partial(_get_default_quantizer, _neutron_target_spec) + get_quantizer_fn = partial( + _get_default_quantizer, _neutron_target_spec, use_qat + ) calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec)) example_input = calibration_inputs[0] @@ -110,10 +113,11 @@ def to_quantized_edge_program( exir_program_aten = torch.export.export(model, example_input, strict=True) - exir_program_aten__module_quant = post_training_quantize( - exir_program_aten, - calibration_inputs, - get_quantizer_fn(), + exir_program_aten__module_quant = calibrate_and_quantize( + model=exir_program_aten, + calibration_inputs=calibration_inputs, + quantizer=get_quantizer_fn(), + is_qat=use_qat, ) compile_spec = generate_neutron_compile_spec( @@ -150,11 +154,13 @@ def to_quantized_edge_program( def to_quantized_executorch_program( model: torch.nn.Module, input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]], + use_qat: bool = False, use_neutron_for_format_conversion: bool = True, ) -> ExecutorchProgramManager: edge_program_manager = to_quantized_edge_program( model, input_spec, + use_qat=use_qat, use_neutron_for_format_conversion=use_neutron_for_format_conversion, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py index 96b9abfe117..2e9a1b393ff 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py @@ -20,6 +20,7 @@ from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -63,13 +64,13 @@ def forward(self, x): return x.abs() -def test_conv_abs(mocker, input_shape: tuple[int] = (1, 3, 112, 112)): +def test_conv_abs(mocker, use_qat, input_shape: tuple[int] = (1, 3, 112, 112)): model = ConvBlocksWithAbs(conv_in_channels=input_shape[1]) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") quantized_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() tflite_flatbuffers_model, io_formats = converter_spy.spy_return diff --git a/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py index a80d2014487..db5cbdcbb5e 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py @@ -16,6 +16,7 @@ AdaptiveAvgPool2dConvModule, ) from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -40,7 +41,7 @@ def reseed_model_per_test_run(): ], ) def test_adaptive_avg_pool_2d_delegated_quant_conversion( - mocker, input_shape, output_size + mocker, input_shape, output_size, use_qat ): model = AdaptiveAvgPool2dConvModule(output_size) @@ -48,7 +49,7 @@ def test_adaptive_avg_pool_2d_delegated_quant_conversion( # Run conversion edge_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() nodes = [str(node) for node in edge_program.graph.nodes] @@ -86,7 +87,7 @@ def test_adaptive_avg_pool_2d_delegated_quant_conversion( ], ) def test_adaptive_avg_pool_2d_non_delegated_quant_conversion( - mocker, input_shape, output_size + mocker, input_shape, output_size, use_qat ): model = AdaptiveAvgPool2dConvModule(output_size) @@ -94,7 +95,7 @@ def test_adaptive_avg_pool_2d_non_delegated_quant_conversion( # Run conversion edge_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() nodes = list(edge_program.graph.nodes) @@ -119,7 +120,7 @@ def test_adaptive_avg_pool_2d_non_delegated_quant_conversion( ) -def test_adaptive_avg_pool_2d_mean_dim_quant_conversion(mocker): +def test_adaptive_avg_pool_2d_mean_dim_quant_conversion(mocker, use_qat): input_shape = (1, 4, 16, 16) model = AdaptiveAvgPool2dConvMeanDimModule() @@ -127,7 +128,7 @@ def test_adaptive_avg_pool_2d_mean_dim_quant_conversion(mocker): # Run conversion _ = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ) # Capture generated model diff --git a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py index 02e799723d4..1aa58ab5d95 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py @@ -21,6 +21,7 @@ AddTensorOneInputModule, ) from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -38,13 +39,13 @@ def reseed_model_per_test_run(): pytest.param((1, 4, 8, 8), id="4D."), ], ) -def test_add_tensor_quant_conversion(mocker, input_shape): +def test_add_tensor_quant_conversion(mocker, input_shape, use_qat): model = AddTensorModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - _ = to_quantized_edge_program(model, [input_shape, input_shape]) + _ = to_quantized_edge_program(model, [input_shape, input_shape], use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -69,13 +70,13 @@ def test_add_tensor_quant_conversion(mocker, input_shape): pytest.param((1, 4, 8, 8), id="4D."), ], ) -def test_add_tensor_one_input_quant_conversion(mocker, input_shape): +def test_add_tensor_one_input_quant_conversion(mocker, input_shape, use_qat): model = AddTensorOneInputModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - _ = to_quantized_edge_program(model, input_shape) + _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -97,14 +98,14 @@ def test_add_tensor_one_input_quant_conversion(mocker, input_shape): pytest.param((1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."), ], ) -def test_add_tensor_w_conv_quant_conversion(mocker, input_shape): +def test_add_tensor_w_conv_quant_conversion(mocker, input_shape, use_qat): model = AddTensorConvModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion _ = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ) # Capture generated model @@ -137,13 +138,13 @@ def test_add_tensor_w_conv_quant_conversion(mocker, input_shape): ], ) def test_add_tensor_broadcasting_unsupported_quant_conversion( - x_input_shape, y_input_shape + x_input_shape, y_input_shape, use_qat ): model = AddTensorModule() # Run conversion edge_program = to_quantized_edge_program( - model, [x_input_shape, y_input_shape] + model, [x_input_shape, y_input_shape], use_qat=use_qat ).exported_program() nodes = list(edge_program.graph.nodes) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py index a6f5ef8c93b..a8cdee41830 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py @@ -19,6 +19,7 @@ ) from executorch.backends.nxp.tests.models import AddmmModule, LinearModule from executorch.exir.dialects._ops import ops as exir_ops +from parameterized import parameterized from torch.export import ExportedProgram @@ -28,7 +29,8 @@ def setUpClass(cls): torch.manual_seed(23) np.random.seed(42) - def test_addmm_conversion(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_addmm_conversion(self, _, use_qat: bool): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True, @@ -38,7 +40,7 @@ def test_addmm_conversion(self): model = AddmmModule(input_shape[1]) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. @@ -60,7 +62,8 @@ def test_addmm_conversion(self): tfl_model=tflite_flatbuffers_model, ) - def test_linear_conversion__with_bias(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_linear_conversion__with_bias(self, _, use_qat: bool): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True, @@ -70,7 +73,7 @@ def test_linear_conversion__with_bias(self): model = LinearModule(bias=True) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py index 7aed0236043..b6083d1e816 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py @@ -28,6 +28,7 @@ ) from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -143,14 +144,16 @@ def test_avg_pool_2d_conversion(input_shape, padding, count_include_pad): ), ], ) -def test_avg_pool_2d_quant_conversion(mocker, input_shape, padding, count_include_pad): +def test_avg_pool_2d_quant_conversion( + mocker, input_shape, padding, count_include_pad, use_qat +): model = AvgPool2dConvModule(padding=padding, count_include_pad=count_include_pad) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion _ = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ) # Capture generated model @@ -170,7 +173,7 @@ def test_avg_pool_2d_quant_conversion(mocker, input_shape, padding, count_includ ) -def test_avg_pool_2d_quant_conversion__padded(mocker): +def test_avg_pool_2d_quant_conversion__padded(mocker, use_qat): input_shape = (1, 8, 8, 8) model = AvgPool2dModule(True, 1) @@ -179,7 +182,7 @@ def test_avg_pool_2d_quant_conversion__padded(mocker): # Run conversion _ = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ) # Capture the converter operators. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py index 590b0be6a6b..e3ee2fff90b 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py @@ -22,6 +22,7 @@ ) from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 def _normalized_dim(dim, rank): @@ -84,13 +85,13 @@ def forward(self, *inputs: torch.Tensor): pytest.param(4, 5, -3, id="4D, 5 inputs, dim=-3"), ], ) -def test_cat__same_shapes(dim, num_inputs, rank, mocker): +def test_cat__same_shapes(dim, num_inputs, rank, mocker, use_qat): input_shape = tuple([8, 8, 8, 8][:rank]) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") quantized_program = to_quantized_edge_program( - CatModule(dim), [input_shape] * num_inputs + CatModule(dim), [input_shape] * num_inputs, use_qat=use_qat ).exported_program() # Make sure the `Cat` was delegated. @@ -115,13 +116,13 @@ def test_cat__same_shapes(dim, num_inputs, rank, mocker): @pytest.mark.parametrize("dim", [3, -2, -3]) @pytest.mark.parametrize("num_inputs", [2, 5]) -def test_cat__channels_first__same_shapes(dim, num_inputs, mocker): +def test_cat__channels_first__same_shapes(dim, num_inputs, mocker, use_qat): input_shape = (2, 8, 6, 8) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") channels = input_shape[1] if dim not in {1, -3} else input_shape[1] * num_inputs quantized_program = to_quantized_edge_program( - CatConvModule(dim, channels), [input_shape] * num_inputs + CatConvModule(dim, channels), [input_shape] * num_inputs, use_qat=use_qat ).exported_program() # Make sure the `Cat` was delegated. @@ -158,13 +159,13 @@ def test_cat__channels_first__same_shapes(dim, num_inputs, mocker): pytest.param(-2, (1, 1, 1, 8), id="axis = -2"), ], ) -def test_cat__unsupported__imxrt700(dim, input_shape): +def test_cat__unsupported__imxrt700(dim, input_shape, use_qat): """This test is conjoined with the one below (`test_cat__context_dependent__imxrt700`). In this case, the inputs of the `cat` are NOT compute ops, so the `cat` is NOT delegated. """ num_inputs = 2 quantized_program = to_quantized_edge_program( - CatModule(dim), [input_shape] * num_inputs, target="imxrt700" + CatModule(dim), [input_shape] * num_inputs, target="imxrt700", use_qat=use_qat ).exported_program() # Make sure the `Cat` was NOT delegated. @@ -188,13 +189,16 @@ def test_cat__unsupported__imxrt700(dim, input_shape): pytest.param(-2, (1, 1, 1, 8), id="axis = -2"), ], ) -def test_cat__context_dependent__imxrt700(dim, input_shape): +def test_cat__context_dependent__imxrt700(dim, input_shape, use_qat): """This test is conjoined with the one above (`test_cat__unsupported__imxrt700`). In this case, the inputs of the `cat` are compute ops, so the `cat` is delegated. """ num_inputs = 2 ep = to_quantized_edge_program( - AddCatModule(dim), [input_shape] * num_inputs, target="imxrt700" + AddCatModule(dim), + [input_shape] * num_inputs, + target="imxrt700", + use_qat=use_qat, ).exported_program() # Make sure the `Cat` was delegated. @@ -218,7 +222,7 @@ def test_cat__context_dependent__imxrt700(dim, input_shape): pytest.param(4, 5, -3, id="4D, 5 inputs, dim=-3"), ], ) -def test_cat__different_shapes(dim, num_inputs, rank, mocker): +def test_cat__different_shapes(dim, num_inputs, rank, mocker, use_qat): input_shape = tuple([2, 8, 8, 8, 8][-rank:]) # The shape of every input will be different along the concatenated dimension. @@ -231,7 +235,7 @@ def test_cat__different_shapes(dim, num_inputs, rank, mocker): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") quantized_program = to_quantized_edge_program( - CatModule(dim), input_shapes + CatModule(dim), input_shapes, use_qat=use_qat ).exported_program() # Make sure the `Cat` was delegated. @@ -258,7 +262,7 @@ def test_cat__different_shapes(dim, num_inputs, rank, mocker): @pytest.mark.parametrize( "num_inputs", [2, 5], ids=lambda num_inputs: f"num_inputs = {num_inputs}" ) -def test_cat__channels_first__different_shapes(dim, num_inputs, mocker): +def test_cat__channels_first__different_shapes(dim, num_inputs, mocker, use_qat): input_shape = (2, 8, 6, 8) # The shape of every input will be different along the concatenated dimension. @@ -276,7 +280,7 @@ def test_cat__channels_first__different_shapes(dim, num_inputs, mocker): sum(shape[1] for shape in input_shapes) if dim in [1, -3] else input_shape[1] ) quantized_program = to_quantized_edge_program( - CatConvModule(dim, channels), input_shapes + CatConvModule(dim, channels), input_shapes, use_qat=use_qat ).exported_program() # Make sure the `Cat` was delegated. @@ -301,7 +305,7 @@ def test_cat__channels_first__different_shapes(dim, num_inputs, mocker): ) -def test_cat__different_shapes__unsupported_channels__imxrt700(): +def test_cat__different_shapes__unsupported_channels__imxrt700(use_qat): input_shape = (2, 4, 6, 7) # (channels % 8) != 0 num_inputs = 2 @@ -315,7 +319,7 @@ def test_cat__different_shapes__unsupported_channels__imxrt700(): input_shapes.append(tuple(tmp_shape)) quantized_program = to_quantized_edge_program( - CatModule(dim), input_shapes, target="imxrt700" + CatModule(dim), input_shapes, target="imxrt700", use_qat=use_qat ).exported_program() # Make sure the `Cat` was NOT delegated. @@ -327,7 +331,7 @@ def test_cat__different_shapes__unsupported_channels__imxrt700(): ) -def test_cat__force_delegate(): +def test_cat__force_delegate(use_qat): target = "imxrt700" # The Partitioner doesn't know if the `8` or the `1` will become the channels in the IR. Therefore, it would @@ -339,6 +343,7 @@ def test_cat__force_delegate(): [input_shape, input_shape], target=target, custom_delegation_options=CustomDelegationOptions(force_delegate_cat=True), + use_qat=use_qat, ).exported_program() # Make sure the `Cat` was delegated. @@ -348,7 +353,7 @@ def test_cat__force_delegate(): assert any("lowered_module" in node.name for node in quantized_program.graph.nodes) -def test_cat__same_shapes_converter_padding_last_dimension(): +def test_cat__same_shapes_converter_padding_last_dimension(use_qat): target = "imxrt700" # The Converter is capable of padding the last dimension of `cat` with the same input shapes. @@ -360,6 +365,7 @@ def test_cat__same_shapes_converter_padding_last_dimension(): target=target, neutron_converter_flavor="SDK_25_09", custom_delegation_options=CustomDelegationOptions(), + use_qat=use_qat, ).exported_program() # Make sure the `Cat` was delegated. @@ -369,7 +375,7 @@ def test_cat__same_shapes_converter_padding_last_dimension(): assert any("lowered_module" in node.name for node in quantized_program.graph.nodes) -def test_cat__same_shapes__channels_first__padding_channels(): +def test_cat__same_shapes__channels_first__padding_channels(use_qat): target = "imxrt700" # The Converter is capable of padding the last dimension of `cat` with the same input shapes. @@ -381,6 +387,7 @@ def test_cat__same_shapes__channels_first__padding_channels(): target=target, neutron_converter_flavor="SDK_25_09", custom_delegation_options=CustomDelegationOptions(), + use_qat=use_qat, ).exported_program() # Make sure the `Cat` was delegated. @@ -390,7 +397,7 @@ def test_cat__same_shapes__channels_first__padding_channels(): assert any("lowered_module" in node.name for node in quantized_program.graph.nodes) -def test_cat__same_shapes_converter_padding_middle_dimension(): +def test_cat__same_shapes_converter_padding_middle_dimension(use_qat): target = "imxrt700" # The Converter is not capable of padding the middle dimensions of `cat` with the same input shapes. @@ -401,6 +408,7 @@ def test_cat__same_shapes_converter_padding_middle_dimension(): [input_shape, input_shape], target=target, custom_delegation_options=CustomDelegationOptions(), + use_qat=use_qat, ).exported_program() # Make sure the `Cat` was NOT delegated. @@ -412,7 +420,7 @@ def test_cat__same_shapes_converter_padding_middle_dimension(): ) -def test_cat__format_specific_support__formatless(mocker): +def test_cat__format_specific_support__formatless(mocker, use_qat): # The last dim will end up being the channels, as the format is `formatless`. # Only the last dim satisfies the Neutron requirements for the channels. input_shape = (3, 3, 3, 8) @@ -424,7 +432,7 @@ def test_cat__format_specific_support__formatless(mocker): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") quantized_program = to_quantized_edge_program( - CatModule(dim), input_shapes + CatModule(dim), input_shapes, use_qat=use_qat ).exported_program() # Make sure the `Cat` was delegated. @@ -447,7 +455,7 @@ def test_cat__format_specific_support__formatless(mocker): ) -def test_cat__format_specific_support__channels_first(mocker): +def test_cat__format_specific_support__channels_first(mocker, use_qat): # The second dim will end up being the channels, as the format is `formatless`. # Only the second dim satisfies the Neutron requirements for the channels. input_shape = (3, 8, 3, 3) @@ -462,7 +470,7 @@ def test_cat__format_specific_support__channels_first(mocker): sum(shape[1] for shape in input_shapes) if dim in [1, -3] else input_shape[1] ) quantized_program = to_quantized_edge_program( - CatConvModule(dim, channels), input_shapes + CatConvModule(dim, channels), input_shapes, use_qat=use_qat ).exported_program() # Make sure the `Cat` was delegated. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py index 427ddaf14a5..250ddb88212 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py @@ -100,9 +100,15 @@ def target_can_be_clone(node): return node in clone_ops or target_can_be_clone(node) @parameterized.expand( - list(itertools.product([True, False], [(1, 3, 128, 128), (1, 3, 256, 256)])) + list( + itertools.product( + [True, False], [(1, 3, 128, 128), (1, 3, 256, 256)], [True, False] + ) + ) ) - def test_conv_dropout_quant(self, inplace_dropout: bool, input_shape: tuple[int]): + def test_conv_dropout_quant( + self, inplace_dropout: bool, input_shape: tuple[int], use_qat: bool + ): model = SingleConvBlockWithDropout( conv_in_channels=input_shape[1], perform_inplace_dropout=inplace_dropout ).eval() @@ -113,7 +119,10 @@ def test_conv_dropout_quant(self, inplace_dropout: bool, input_shape: tuple[int] owner=EdgeProgramToIRConverter, ) as converter_spy: quantized_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, + input_shape, + use_qat=use_qat, + use_neutron_for_format_conversion=False, ).exported_program() tflite_flatbuffers_model, _ = converter_spy.calls[-1].return_value @@ -157,7 +166,10 @@ def test_conv_dropout_no_quant( # Clone with inplace=True should not produce clone edge op and vice versa assert inplace_dropout ^ has_clone - def test_clone_pool_view_copy_quant(self, input_shape: tuple[int] = (1, 64, 25, 5)): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_clone_pool_view_copy_quant( + self, _, use_qat: bool, input_shape: tuple[int] = (1, 64, 25, 5) + ): model = KWSFinalBlock(input_shape).eval() with kgb.spy_on( @@ -166,7 +178,7 @@ def test_clone_pool_view_copy_quant(self, input_shape: tuple[int] = (1, 64, 25, owner=EdgeProgramToIRConverter, ) as converter_spy: quantized_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() tflite_flatbuffers_model, _ = converter_spy.calls[-1].return_value diff --git a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py index bd1f894001c..a2c9526a508 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py @@ -22,6 +22,7 @@ ConstantPadNDConvModule, ConstantPadNDModule, ) +from executorch.backends.nxp.tests.use_qat import * # noqa F403 from executorch.exir.dialects._ops import ops as exir_ops @@ -120,20 +121,24 @@ def test_constant_pad_nd_conversion__channels_first(input_shape, paddings): pytest.param((1, 1, 6, 8), (1, 2, 3, 4, 2, 1), id="4D, padding C, H, W"), ], ) -def test_constant_pad_nd__unsupported_paddings(input_shape, paddings): +def test_constant_pad_nd__unsupported_paddings(input_shape, paddings, use_qat): model = ConstantPadNDModule(paddings) - exec_program = to_quantized_edge_program(model, input_shape).exported_program() + exec_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() nodes = list(exec_program.graph.nodes) # There is at least one non-delegated Pad node assert any(node.name == "aten_constant_pad_nd_default" for node in nodes) -def test_constant_pad_nd__delegation__formatless__supported_padding(): +def test_constant_pad_nd__delegation__formatless__supported_padding(use_qat): input_shape = (2, 4, 6, 8) # Formatless -> the last dim (8) will be padded. paddings = [0, 0, 1, 2, 3, 4] # The last dim is padded using the first 2 paddings. model = ConstantPadNDModule(paddings) - exec_program = to_quantized_edge_program(model, input_shape).exported_program() + exec_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `pad` was delegated. assert not graph_contains_any_of_ops( @@ -141,11 +146,13 @@ def test_constant_pad_nd__delegation__formatless__supported_padding(): ) -def test_constant_pad_nd__delegation__formatless__unsupported_padding(): +def test_constant_pad_nd__delegation__formatless__unsupported_padding(use_qat): input_shape = (2, 4, 6, 8) # Formatless -> the last dim (8) will be padded. paddings = [0, 1] # The last dim is padded using the first 2 paddings. model = ConstantPadNDModule(paddings) - exec_program = to_quantized_edge_program(model, input_shape).exported_program() + exec_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `pad` was NOT delegated. assert graph_contains_any_of_ops( @@ -153,11 +160,13 @@ def test_constant_pad_nd__delegation__formatless__unsupported_padding(): ) -def test_constant_pad_nd__delegation__channels_first__supported_padding(): +def test_constant_pad_nd__delegation__channels_first__supported_padding(use_qat): input_shape = (2, 4, 6, 8) # Channels first -> the second dim (4) will be padded. paddings = [1, 2, 3, 4, 0, 0] # The second dim is padded using the paddings[4:6]. model = ConstantPadNDConvModule(paddings) - exec_program = to_quantized_edge_program(model, input_shape).exported_program() + exec_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `pad` was delegated. assert not graph_contains_any_of_ops( @@ -165,11 +174,13 @@ def test_constant_pad_nd__delegation__channels_first__supported_padding(): ) -def test_constant_pad_nd__delegation__channels_first__unsupported_padding(): +def test_constant_pad_nd__delegation__channels_first__unsupported_padding(use_qat): input_shape = (2, 3, 6, 8) # Channels first -> the second dim (3) will be padded. paddings = [0, 0, 0, 0, 1, 0] # The second dim is padded using the paddings[4:6]. model = ConstantPadNDConvModule(paddings) - exec_program = to_quantized_edge_program(model, input_shape).exported_program() + exec_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `pad` was NOT delegated. assert graph_contains_any_of_ops( diff --git a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py index 0fabbf615c9..56fdf1a2e0c 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py @@ -30,6 +30,7 @@ from executorch.backends.nxp.tests.models import Conv1dModule, Conv2dModule from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -42,7 +43,7 @@ def reseed_model_per_test_run(): @pytest.mark.parametrize("stride", [1, 2]) @pytest.mark.parametrize("dilation", [2, 1]) @pytest.mark.parametrize("kernel_size", [(1,), (3,)]) -def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker): +def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker, use_qat): input_shape = (1, 4, 16) model = Conv1dModule( bias=bias, stride=stride, dilation=dilation, kernel_size=kernel_size @@ -51,7 +52,7 @@ def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker): ops_spy = mocker.spy(ModelBuilder, "finish") # Run conversion - _ = to_quantized_edge_program(model, input_shape) + _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -96,7 +97,7 @@ def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker): ) @pytest.mark.parametrize("padding", [(1,), 2]) def test_conv1d_quant_conversion__padded( - stride, dilation, kernel_size, padding, mocker + stride, dilation, kernel_size, padding, mocker, use_qat ): input_shape = (1, 4, 16) model = Conv1dModule( @@ -106,7 +107,7 @@ def test_conv1d_quant_conversion__padded( ops_spy = mocker.spy(ModelBuilder, "finish") # Run conversion - _ = to_quantized_edge_program(model, input_shape) + _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -153,7 +154,7 @@ def test_conv1d_quant_conversion__padded( @pytest.mark.parametrize("dilation", [2, 1]) @pytest.mark.parametrize("kernel_size", [(1,), (3,)]) def test_conv1d_quant_conversion__depthwise( - bias, stride, dilation, kernel_size, mocker + bias, stride, dilation, kernel_size, mocker, use_qat ): input_shape = (1, 4, 16) group = input_shape[1] @@ -170,7 +171,7 @@ def test_conv1d_quant_conversion__depthwise( ops_spy = mocker.spy(ModelBuilder, "finish") # Run conversion - _ = to_quantized_edge_program(model, input_shape) + _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -214,7 +215,7 @@ def test_conv1d_quant_conversion__depthwise( ) @pytest.mark.parametrize("padding", [(1,), 2]) def test_conv1d_quant_conversion__depthwise__padded( - stride, dilation, kernel_size, padding, mocker + stride, dilation, kernel_size, padding, mocker, use_qat ): input_shape = (1, 4, 16) group = input_shape[1] @@ -231,7 +232,7 @@ def test_conv1d_quant_conversion__depthwise__padded( ops_spy = mocker.spy(ModelBuilder, "finish") # Run conversion - _ = to_quantized_edge_program(model, input_shape) + _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -401,12 +402,12 @@ def test_conv1d_quant_conversion__depthwise__padded( ), ], ) -def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape): +def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion _ = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ) # Capture generated model @@ -432,7 +433,7 @@ def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape): @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.parametrize("kernel_shape", [[1, 2], [3, 3], [4, 1]]) def test_conv2d_conversion__depthwise__quantized( - bias, stride, dilation, kernel_shape, mocker + bias, stride, dilation, kernel_shape, mocker, use_qat ): input_shape = (1, 4, 12, 12) group = input_shape[1] @@ -449,6 +450,7 @@ def test_conv2d_conversion__depthwise__quantized( kernel_size=kernel_shape, ), tuple(input_shape), + use_qat=use_qat, use_neutron_for_format_conversion=False, ).exported_program() @@ -497,7 +499,7 @@ def test_conv2d_conversion__depthwise__padded(padding, mocker): @pytest.mark.parametrize("padding", [1, 2]) -def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker): +def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker, use_qat): input_shape = (1, 4, 12, 12) group = input_shape[1] spy = mocker.spy(ModelBuilder, "finish") @@ -507,6 +509,7 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker): group=group, in_channels=group, out_channels=group, padding=padding ), tuple(input_shape), + use_qat=use_qat, use_neutron_for_format_conversion=False, ).exported_program() @@ -580,12 +583,12 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker): ], ) def test_conv_transpose2d_conversion__quantized( - mocker, model: torch.nn.Module, input_shape + mocker, model: torch.nn.Module, input_shape, use_qat ): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") edge_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() # Make sure the `TransposeConv` was delegated. @@ -664,9 +667,11 @@ def test_conv_transpose2d_conversion__quantized( ], ) def test_conv_transpose2d_non_delegated_conversion__quantized( - model: torch.nn.Module, input_shape + model: torch.nn.Module, input_shape, use_qat ): - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() nodes = list(edge_program.graph.nodes) assert len(nodes) == 15 diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py index dad8ce6a0e3..fb272a2c650 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py @@ -23,6 +23,7 @@ from executorch.backends.nxp.tests.models import Conv2dWithActivation from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -33,7 +34,7 @@ def reseed_model_per_test_run(): @pytest.mark.parametrize("input_shape", [(1, 3, 128, 128)]) @pytest.mark.parametrize("inplace", [True, False]) -def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool): +def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool, use_qat: bool): # The torch.nn.Relu6 inherits from torch.nn.Hardtanh, and hence represented as HardTanh in ATen. # Testing the hardtanh originated from torch.nn.Relu6 op. model = Conv2dWithActivation( @@ -43,7 +44,7 @@ def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") quantized_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -69,7 +70,11 @@ def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool): ) @pytest.mark.parametrize("inplace", [True, False]) def test_custom_hardtanh_quant( - mocker, input_shape: tuple[int], activation_range: tuple[int, int], inplace: bool + mocker, + input_shape: tuple[int], + activation_range: tuple[int, int], + inplace: bool, + use_qat: bool, ): # TODO(13063): This test suffers from non-ideal testing random quantization, because we always use range <0,1>. # We should update (decrease atol) when the Conv/Linear + Activation fuse at quantization is in place. @@ -82,7 +87,7 @@ def test_custom_hardtanh_quant( converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") quantized_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() tflite_flatbuffers_model, io_formats = converter_spy.spy_return diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 8b938ef7fff..569ad571dbc 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -25,6 +25,7 @@ from executorch.backends.xnnpack._passes import RemoveGetItemPass from executorch.exir.verification.verifier import EXIREdgeDialectVerifier from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -103,13 +104,14 @@ def test_max_pool_2d_conversion(input_shape, padding): ), ], ) -def test_max_pool_2d_quant_conversion(mocker, input_shape, padding): +def test_max_pool_2d_quant_conversion(mocker, input_shape, padding, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion _ = to_quantized_edge_program( MaxPool2dConvModule(padding=padding), input_shape, + use_qat=use_qat, use_neutron_for_format_conversion=False, ) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py index ee69b1ea352..7c0a5e8ffcf 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py @@ -18,6 +18,7 @@ ToChannelLastPreprocess, ) from executorch.backends.nxp.tests.models import MeanDimConvModule, MeanDimLinearModule +from executorch.backends.nxp.tests.use_qat import * # noqa F403 from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram @@ -47,14 +48,16 @@ def forward(self, x): pytest.param((1, 4, 8, 8), (3, 2), id="Dim 3, 2."), ], ) -def test_mean_dim_conv_quant_conversion(mocker, input_shape, dim, keepdim=True): +def test_mean_dim_conv_quant_conversion( + mocker, input_shape, dim, use_qat, keepdim=True +): model = MeanDimConvModule(dim, keepdim) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion ep = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() # Make sure the `mean.dim` was delegated. assert not graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim]) @@ -93,14 +96,16 @@ def test_mean_dim_conv_quant_conversion(mocker, input_shape, dim, keepdim=True): ], ) def test_mean_dim_linear_unsupported_quant_conversion( - mocker, input_shape, dim, keepdim + mocker, input_shape, dim, use_qat, keepdim ): model = MeanDimLinearModule(dim, keepdim) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() nodes = list(edge_program.graph.nodes) # Last 2 dimensions are not used or keepdim is False, cannot be converted to MeanDim, node is not delegated @@ -138,14 +143,16 @@ def test_mean_dim_linear_unsupported_quant_conversion( pytest.param(True, id="Keep dim."), ], ) -def test_mean_dim_conv_unsupported_quant_conversion(mocker, input_shape, dim, keepdim): +def test_mean_dim_conv_unsupported_quant_conversion( + mocker, input_shape, dim, use_qat, keepdim +): model = MeanDimConvModule(dim, keepdim) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion edge_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() nodes = list(edge_program.graph.nodes) @@ -178,12 +185,16 @@ def test_mean_dim_conv_unsupported_quant_conversion(mocker, input_shape, dim, ke pytest.param((1, 2, 3, 8), (-2, -3), id="Dim -2, -3."), ], ) -def test_mean_dim__formatless__supported(mocker, input_shape, dim, keepdim=True): +def test_mean_dim__formatless__supported( + mocker, input_shape, dim, use_qat, keepdim=True +): model = MeanDimModule(dim, keepdim) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - ep = to_quantized_edge_program(model, input_shape).exported_program() + ep = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `mean.dim` was delegated. assert not graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim]) @@ -211,10 +222,12 @@ def test_mean_dim__formatless__supported(mocker, input_shape, dim, keepdim=True) pytest.param((1, 2, 3, 8), (2, 3), id="Dim 2, 3."), ], ) -def test_mean_dim__formatless__unsupported(input_shape, dim, keepdim=True): +def test_mean_dim__formatless__unsupported(input_shape, dim, use_qat, keepdim=True): model = MeanDimModule(dim, keepdim) - ep = to_quantized_edge_program(model, input_shape).exported_program() + ep = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `mean.dim` was NOT delegated. assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim]) @@ -229,10 +242,14 @@ def test_mean_dim__formatless__unsupported(input_shape, dim, keepdim=True): ), ], ) -def test_mean_dim__formatless__unsupported_channels(input_shape, dim, keepdim=True): +def test_mean_dim__formatless__unsupported_channels( + input_shape, dim, use_qat, keepdim=True +): model = MeanDimModule(dim, keepdim) - ep = to_quantized_edge_program(model, input_shape).exported_program() + ep = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `mean.dim` was NOT delegated. assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim]) @@ -247,13 +264,17 @@ def test_mean_dim__formatless__unsupported_channels(input_shape, dim, keepdim=Tr ), ], ) -def test_mean_dim__channels_first__unsupported_channels(input_shape, dim, keepdim=True): +def test_mean_dim__channels_first__unsupported_channels( + input_shape, dim, use_qat, keepdim=True +): model = MeanDimConvModule( dim, keepdim, out_channels=5 ) # Only multiples of 8 (num_macs) are supported. # Run conversion - ep = to_quantized_edge_program(model, input_shape).exported_program() + ep = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure the `mean.dim` was NOT delegated. assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim]) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py index a2b406cdc76..962a4f4b0c1 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py @@ -19,6 +19,7 @@ ) from executorch.backends.nxp.tests.models import LinearModule, MmModule from executorch.exir.dialects._ops import ops as exir_ops +from parameterized import parameterized from torch.export import ExportedProgram @@ -28,7 +29,8 @@ def setUpClass(cls): torch.manual_seed(23) np.random.seed(42) - def test_mm_conversion(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_mm_conversion(self, _, use_qat: bool): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True, @@ -38,7 +40,7 @@ def test_mm_conversion(self): model = MmModule(input_shape[1]) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. @@ -60,7 +62,8 @@ def test_mm_conversion(self): tfl_model=tflite_flatbuffers_model, ) - def test_linear_conversion__without_bias(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_linear_conversion__without_bias(self, _, use_qat: bool): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True, @@ -70,7 +73,7 @@ def test_linear_conversion__without_bias(self): model = LinearModule(bias=False) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. diff --git a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py index c4fb84dbb60..d32de7241e5 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py @@ -21,6 +21,7 @@ from executorch.exir.dialects._ops import ops as exir_ops from parameterized import parameterized from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 class Conv2dTransposeModule(torch.nn.Module): @@ -112,12 +113,14 @@ def setUpClass(cls): @parameterized.expand( [ - ["To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2)], - ["To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1)], + ["QAT; To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2), True], + ["PTQ; To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2), False], + ["QAT; To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1), True], + ["PTQ; To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1), False], ] ) def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_input( - self, _: str, input_shape, perm + self, _: str, input_shape, perm, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True @@ -126,7 +129,7 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_inp # Run conversion edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure the `Permute_copy` was delegated. @@ -156,12 +159,14 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_inp @parameterized.expand( [ - ["To channel first permutation", (1, 8, 8, 8), (0, 3, 1, 2)], - ["To channel last permutation", (1, 8, 8, 8), (0, 2, 3, 1)], + ["QAT; To channel first permutation", (1, 8, 8, 8), (0, 3, 1, 2), True], + ["PTQ; To channel first permutation", (1, 8, 8, 8), (0, 3, 1, 2), False], + ["QAT; To channel last permutation", (1, 8, 8, 8), (0, 2, 3, 1), True], + ["PTQ; To channel last permutation", (1, 8, 8, 8), (0, 2, 3, 1), False], ] ) def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_output( - self, _: str, input_shape, perm + self, _: str, input_shape, perm, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True @@ -170,7 +175,7 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_out # Run conversion edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure the `Permute_copy` was delegated. @@ -200,14 +205,66 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_out @parameterized.expand( [ - ["nchw->nhwc ... nchw->nhwc", (1, 8, 8, 8), (0, 2, 3, 1), (0, 2, 3, 1)], - ["nchw->nhwc ... nhwc->nchw", (1, 8, 8, 8), (0, 2, 3, 1), (0, 3, 1, 2)], - ["nhwc->nchw ... nhwc->nchw", (1, 8, 8, 8), (0, 3, 1, 2), (0, 3, 1, 2)], - ["nhwc->nchw ... nchw->nhwc", (1, 8, 8, 8), (0, 3, 1, 2), (0, 2, 3, 1)], + [ + "QAT; nchw->nhwc ... nchw->nhwc", + (1, 8, 8, 8), + (0, 2, 3, 1), + (0, 2, 3, 1), + True, + ], + [ + "PTQ; nchw->nhwc ... nchw->nhwc", + (1, 8, 8, 8), + (0, 2, 3, 1), + (0, 2, 3, 1), + False, + ], + [ + "QAT; nchw->nhwc ... nhwc->nchw", + (1, 8, 8, 8), + (0, 2, 3, 1), + (0, 3, 1, 2), + True, + ], + [ + "PTQ; nchw->nhwc ... nhwc->nchw", + (1, 8, 8, 8), + (0, 2, 3, 1), + (0, 3, 1, 2), + False, + ], + [ + "QAT; nhwc->nchw ... nhwc->nchw", + (1, 8, 8, 8), + (0, 3, 1, 2), + (0, 3, 1, 2), + True, + ], + [ + "PTQ; nhwc->nchw ... nhwc->nchw", + (1, 8, 8, 8), + (0, 3, 1, 2), + (0, 3, 1, 2), + False, + ], + [ + "QAT; nhwc->nchw ... nchw->nhwc", + (1, 8, 8, 8), + (0, 3, 1, 2), + (0, 2, 3, 1), + True, + ], + [ + "PTQ; nhwc->nchw ... nchw->nhwc", + (1, 8, 8, 8), + (0, 3, 1, 2), + (0, 2, 3, 1), + False, + ], ] ) def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_io( - self, _: str, input_shape, perm1, perm2 + self, _: str, input_shape, perm1, perm2, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True @@ -216,7 +273,7 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_io( # Run conversion edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure the `Permute_copy` was delegated. @@ -246,20 +303,53 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_io( @parameterized.expand( [ - ["Permutation can be replaced by reshapes", (10, 1, 8), (0, 2, 1)], - ["Permutation can be replaced by reshapes", (10, 1, 1), (2, 1, 0)], - ["Permutation is identical and can be removed", (10, 1, 8), (0, 1, 2)], + [ + "QAT; Permutation can be replaced by reshapes", + (10, 1, 8), + (0, 2, 1), + True, + ], + [ + "PTQ; Permutation can be replaced by reshapes", + (10, 1, 8), + (0, 2, 1), + False, + ], + [ + "QAT; Permutation can be replaced by reshapes", + (10, 1, 1), + (2, 1, 0), + True, + ], + [ + "PTQ; Permutation can be replaced by reshapes", + (10, 1, 1), + (2, 1, 0), + False, + ], + [ + "QAT; Permutation is identical and can be removed", + (10, 1, 8), + (0, 1, 2), + True, + ], + [ + "PTQ; Permutation is identical and can be removed", + (10, 1, 8), + (0, 1, 2), + False, + ], ] ) def test_permute_copy_conversion__from_permute_3D__quantized( - self, _: str, input_shape, perm + self, _: str, input_shape, perm, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True ) as converter_spy: # Run conversion edge_program = to_quantized_edge_program( - LinearPermuteModule(input_shape[2], perm), input_shape + LinearPermuteModule(input_shape[2], perm), input_shape, use_qat=use_qat ).exported_program() # Make sure the `Permute_copy` was delegated. @@ -289,17 +379,23 @@ def test_permute_copy_conversion__from_permute_3D__quantized( @parameterized.expand( [ - ["Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3)], - ["To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3)], - ["To (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0)], - ["To (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2)], + ["QAT; Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3), True], + ["PTQ; Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3), False], + ["QAT; To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3), True], + ["PTQ; To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3), False], + ["QAT; To (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0), True], + ["PTQ; To (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0), False], + ["QAT; To (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2), True], + ["PTQ; To (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2), False], ] ) def test_permute_copy_non_delegated_conversion__from_permute_4D__quantized( - self, _: str, input_shape, perm + self, _: str, input_shape, perm, use_qat ): model = Conv2dPermuteModule(input_shape[1], perm) - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() nodes = list(edge_program.graph.nodes) assert len(nodes) == 8 @@ -309,15 +405,19 @@ def test_permute_copy_non_delegated_conversion__from_permute_4D__quantized( @parameterized.expand( [ - ["Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2], - ["Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3], + ["QAT; Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2, True], + ["PTQ; Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2, False], + ["QAT; Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3, True], + ["PTQ; Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3, False], ] ) def test_permute_copy_non_delegated_conversion__from_transpose_4D__quantized( - self, _: str, input_shape, dim0, dim1 + self, _: str, input_shape, dim0, dim1, use_qat ): model = Conv2dTransposeModule(input_shape[1], dim0, dim1) - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() nodes = list(edge_program.graph.nodes) assert len(nodes) == 8 diff --git a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py index cf0e0135ffe..b91720324f2 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py @@ -21,6 +21,7 @@ ) from executorch.backends.nxp.tests.models import Conv2dModule, LinearModule, ReLUModule from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -62,13 +63,16 @@ def test_relu_conversion(): convert_run_compare(edge_program, input_data=input_data) -def test_relu_with_conv_quant_conversion(mocker): +def test_relu_with_conv_quant_conversion(mocker, use_qat): input_shape = (1, 4, 32, 32) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion _ = to_quantized_edge_program( - ConvReLUModule(), input_shape, use_neutron_for_format_conversion=False + ConvReLUModule(), + input_shape, + use_qat=use_qat, + use_neutron_for_format_conversion=False, ) # Capture generated model @@ -90,12 +94,12 @@ def test_relu_with_conv_quant_conversion(mocker): ) -def test_relu_with_linear_quant_conversion(mocker): +def test_relu_with_linear_quant_conversion(mocker, use_qat): input_shape = (256, 32) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - _ = to_quantized_edge_program(LinearReLUModule(), input_shape) + _ = to_quantized_edge_program(LinearReLUModule(), input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, _ = converter_spy.spy_return diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py index 382266e9cb1..ad03aa18ded 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py @@ -20,6 +20,7 @@ from executorch.backends.nxp.tests.models import ConvWithSigmoid from torch import nn from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -28,13 +29,13 @@ def reseed_model_per_test_run(): np.random.seed(23) -def test_conv_sigmoid(mocker, input_shape: tuple[int] = (1, 3, 112, 112)): +def test_conv_sigmoid(mocker, use_qat, input_shape: tuple[int] = (1, 3, 112, 112)): model = ConvWithSigmoid(conv_in_channels=input_shape[1]) converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ).exported_program() tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -61,12 +62,12 @@ def test_conv_sigmoid(mocker, input_shape: tuple[int] = (1, 3, 112, 112)): pytest.param((10, 3, 25, 25, 25), id="4D"), ], ) -def test_sigmoid_only(mocker, input_shape): +def test_sigmoid_only(mocker, use_qat, input_shape): model = nn.Sigmoid() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - to_quantized_edge_program(model, input_shape).exported_program() + to_quantized_edge_program(model, input_shape, use_qat=use_qat).exported_program() tflite_flatbuffers_model, io_formats = converter_spy.spy_return exported_program: ExportedProgram = converter_spy.call_args.args[1] diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py index 336c3cc9afd..9ce3e93f39b 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py @@ -22,6 +22,7 @@ ) from executorch.exir.dialects._ops import ops as exir_ops from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -39,13 +40,13 @@ def reseed_model_per_test_run(): pytest.param((1, 4, 8, 8), id="4D."), ], ) -def test_sub_tensor_quant_conversion(mocker, input_shape): +def test_sub_tensor_quant_conversion(mocker, input_shape, use_qat): model = SubTensorModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - _ = to_quantized_edge_program(model, [input_shape, input_shape]) + _ = to_quantized_edge_program(model, [input_shape, input_shape], use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -78,13 +79,13 @@ def test_sub_tensor_quant_conversion(mocker, input_shape): pytest.param((1, 4, 8, 8), id="4D."), ], ) -def test_sub_tensor_one_input_quant_conversion(mocker, input_shape): +def test_sub_tensor_one_input_quant_conversion(mocker, input_shape, use_qat): model = SubTensorOneInputModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - _ = to_quantized_edge_program(model, input_shape) + _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -109,7 +110,7 @@ def test_sub_tensor_one_input_quant_conversion(mocker, input_shape): pytest.param((1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."), ], ) -def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape): +def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape, use_qat): model = SubTensorConvModule() converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") @@ -119,7 +120,10 @@ def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape): # Run conversion _ = to_quantized_edge_program( - model, [x_input_shape, y_input_shape], use_neutron_for_format_conversion=False + model, + [x_input_shape, y_input_shape], + use_qat=use_qat, + use_neutron_for_format_conversion=False, ) # Capture generated model @@ -161,13 +165,13 @@ def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape): ], ) def test_sub_tensor_broadcasting_unsupported_quant_conversion( - x_input_shape, y_input_shape + x_input_shape, y_input_shape, use_qat ): model = SubTensorModule() # Run conversion edge_program = to_quantized_edge_program( - model, [x_input_shape, y_input_shape] + model, [x_input_shape, y_input_shape], use_qat=use_qat ).exported_program() nodes = list(edge_program.graph.nodes) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py index eb5fc6600f5..10892d28e38 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py @@ -34,18 +34,18 @@ def setUpClass(cls): @parameterized.expand( input=[ - ( - "inplace", - True, - ), - ( - "not_inplace", - False, - ), + ("QAT inplace", True, True), + ("PTQ inplace", True, False), + ("QAT not-inplace", False, True), + ("PTQ not-inplace", False, False), ] ) def test_conv_tanh( - self, _: str, inplace: bool, input_shape: tuple[int] = (1, 3, 112, 112) + self, + _: str, + inplace: bool, + use_qat: bool, + input_shape: tuple[int] = (1, 3, 112, 112), ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, @@ -62,7 +62,10 @@ def test_conv_tanh( ) quantized_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, + input_shape, + use_qat=use_qat, + use_neutron_for_format_conversion=False, ).exported_program() tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value exported_program: ExportedProgram = converter_spy.calls[-1].args[0] diff --git a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py index 6e3da6c91cd..ce9fecb049b 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py @@ -38,6 +38,7 @@ from executorch.exir.dialects._ops import ops as exir_ops from torch import nn from torch.export import ExportedProgram +from executorch.backends.nxp.tests.use_qat import * # noqa F403 @pytest.fixture(autouse=True) @@ -243,11 +244,13 @@ def test__view_copy__formatless_to_formatless(mocker): pytest.param((8, 64), (1, 16, 4, 4), id="2D"), ], ) -def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape): +def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion - _ = to_quantized_edge_program(LinearReshapeModule(new_shape=new_shape), input_shape) + _ = to_quantized_edge_program( + LinearReshapeModule(new_shape=new_shape), input_shape, use_qat=use_qat + ) # Capture generated model tflite_flatbuffers_model, io_formats = converter_spy.spy_return @@ -268,7 +271,9 @@ def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape): pytest.param((1, 4, 16, 16), 196, id="4D"), ], ) -def test_view_w_conv_linear_quant_conversion(mocker, input_shape, channels_view_out): +def test_view_w_conv_linear_quant_conversion( + mocker, input_shape, channels_view_out, use_qat +): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") # Run conversion @@ -277,6 +282,7 @@ def test_view_w_conv_linear_quant_conversion(mocker, input_shape, channels_view_ channels=input_shape[1], channels_view_out=channels_view_out ), input_shape, + use_qat=use_qat, use_neutron_for_format_conversion=False, ) diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py index edba1af4ede..e2b41aab8de 100644 --- a/backends/nxp/tests/models.py +++ b/backends/nxp/tests/models.py @@ -631,3 +631,42 @@ def __init__(self, activation: str, inplace: bool, in_channels: int): def forward(self, x): x = self.conv(x) return self.activation(x) + + +class MiniConvNetWithRegressionHead(torch.nn.Module): + def __init__(self): + super().__init__() + + self.conv1 = Conv2dModule(in_channels=3, out_channels=16, stride=1, padding=1) + self.relu = torch.nn.ReLU() + self.pool = torch.nn.MaxPool2d(2, 2) + self.conv2 = Conv2dModule(in_channels=16, out_channels=32, stride=1, padding=1) + self.relu2 = torch.nn.ReLU() + self.pool = torch.nn.MaxPool2d(2, 2) + self.linear = torch.nn.Linear(32 * 8 * 8, 1) + + def forward(self, x): + x = self.conv1(x) + x = self.relu(x) + x = self.pool(x) + x = self.conv2(x) + x = self.relu2(x) + x = self.pool(x) + x = x.flatten() + x = self.linear(x) + return x + + +class MLP(torch.nn.Module): + def __init__(self): + super().__init__() + self.sequential = torch.nn.Sequential( + torch.nn.Linear(1, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 10), + torch.nn.ReLU(), + torch.nn.Linear(10, 1), + ) + + def forward(self, x): + return self.sequential(x) diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py index bde3d22e204..d93b1ae69ff 100644 --- a/backends/nxp/tests/test_edge_passes.py +++ b/backends/nxp/tests/test_edge_passes.py @@ -29,7 +29,7 @@ from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.backends.nxp.tests.executorch_pipeline import ( get_random_calibration_inputs, neutron_target_spec, @@ -87,6 +87,8 @@ def _assert_nodes_form_a_view_copy_qdq_cluster(graph: Graph, node_indices: list[ class TestEdgePasses(unittest.TestCase): + __test__ = False # Prevent interfering with PyTest tests + @classmethod def setUpClass(cls): torch.manual_seed(23) @@ -305,7 +307,7 @@ def test_remove_additional_quantize_dequantize_nodes_pass(self): example_input = calibration_inputs[0] exir_program_aten = torch.export.export(model, example_input, strict=True) - exir_program_aten_quant = post_training_quantize( + exir_program_aten_quant = calibrate_and_quantize( exir_program_aten, calibration_inputs, NeutronQuantizer(neutron_target_spec), diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py index 3bd5f3e1487..fe157b44c48 100644 --- a/backends/nxp/tests/test_integration.py +++ b/backends/nxp/tests/test_integration.py @@ -5,6 +5,7 @@ import executorch.extension.pybindings.portable_lib import executorch.kernels.quantized # noqa F401 +from executorch.backends.nxp.tests.use_qat import * # noqa F401 from executorch.backends.nxp.tests.executorch_pipeline import ( to_quantized_executorch_program, @@ -14,11 +15,11 @@ from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNet -def test_conv_fc_softmax__to_executorch_program(): +def test_conv_fc_softmax__to_executorch_program(use_qat): model = ConvFCSoftmaxModule() input_shape = (1, 4, 5, 5) - exec_prog = to_quantized_executorch_program(model, input_shape) + exec_prog = to_quantized_executorch_program(model, input_shape, use_qat) program = exec_prog.exported_program() assert ( @@ -36,11 +37,11 @@ def test_conv_fc_softmax__to_executorch_program(): assert "addmm" not in node.name -def test_cifarnet(): +def test_cifarnet(use_qat): model = CifarNet().get_eager_model().eval() input_shape = (1, 3, 32, 32) exec_prog = to_quantized_executorch_program( - model, input_shape, use_neutron_for_format_conversion=False + model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False ) delegation_info = get_delegation_info(exec_prog.exported_program().graph_module) diff --git a/backends/nxp/tests/test_move_activation_before_concatenation.py b/backends/nxp/tests/test_move_activation_before_concatenation.py index cede3e41994..27bd675a487 100644 --- a/backends/nxp/tests/test_move_activation_before_concatenation.py +++ b/backends/nxp/tests/test_move_activation_before_concatenation.py @@ -3,6 +3,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import itertools import math import unittest @@ -19,7 +20,7 @@ EdgeProgramToIRConverter, ) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.backends.nxp.tests.executorch_pipeline import ( get_random_calibration_inputs, neutron_target_spec, @@ -50,6 +51,35 @@ ] +# Permutation of all supported combinations of: +# , , +all_activation_cases = list( + itertools.product( + ["relu", "relu6", "tanh"], + [True, False], + [True, False], + ) +) + [ + ("sigmoid", False, True), + ("sigmoid", False, False), +] + + +# , , , , +all_concat_cluster_cases = [ + ("relu", "relu", True, False, True), + ("relu", "relu", True, False, False), + ("relu6", "relu6", False, True, True), + ("relu6", "relu6", False, True, False), + ("tanh", "tanh", True, False, True), + ("tanh", "tanh", True, False, False), + ("sigmoid", "sigmoid", False, True, True), + ("sigmoid", "sigmoid", False, True, False), + ("relu", "relu_hardtanh", True, True, True), + ("relu", "relu_hardtanh", True, True, False), +] + + class ConvConcatActivationModule(torch.nn.Module): def __init__(self, activation: str, inplace: bool, in_channels: int): super().__init__() @@ -174,18 +204,8 @@ def setUpClass(cls): torch.manual_seed(23) np.random.seed(42) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) - def test_move_activation_before_concat__conv(self, activation, inplace): + @parameterized.expand(all_activation_cases) + def test_move_activation_before_concat__conv(self, activation, inplace, is_qat): input_shape = (1, 3, 8, 8) model = ConvConcatActivationModule( activation=activation, inplace=inplace, in_channels=3 @@ -248,10 +268,11 @@ def test_move_activation_before_concat__conv(self, activation, inplace): neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) neutron_aten_pass_manager(exir_program_aten) # All passes by default. - exir_program_aten_quant = post_training_quantize( + exir_program_aten_quant = calibrate_and_quantize( exir_program_aten, calibration_inputs, NeutronQuantizer(neutron_target_spec), + is_qat=is_qat, ) # Check convolution and activation are in same QDQ cluster. @@ -282,18 +303,8 @@ def test_move_activation_before_concat__conv(self, activation, inplace): == torch.ops.quantized_decomposed.quantize_per_tensor.default ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) - def test_move_activation_before_concat__linear(self, activation, inplace): + @parameterized.expand(all_activation_cases) + def test_move_activation_before_concat__linear(self, activation, inplace, is_qat): input_shape = (1, 8) model = LinearConcatActivationModule( activation=activation, inplace=inplace, in_channels=8, mode="linear" @@ -356,10 +367,11 @@ def test_move_activation_before_concat__linear(self, activation, inplace): neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) neutron_aten_pass_manager(exir_program_aten) # All passes by default. - exir_program_aten_quant = post_training_quantize( + exir_program_aten_quant = calibrate_and_quantize( exir_program_aten, calibration_inputs, NeutronQuantizer(neutron_target_spec), + is_qat=is_qat, ) # Check linear and activation are in same QDQ cluster. @@ -390,18 +402,8 @@ def test_move_activation_before_concat__linear(self, activation, inplace): == torch.ops.quantized_decomposed.quantize_per_tensor.default ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) - def test_move_activation_before_concat__addmm(self, activation, inplace): + @parameterized.expand(all_activation_cases) + def test_move_activation_before_concat__addmm(self, activation, inplace, is_qat): input_shape = (1, 8) model = LinearConcatActivationModule( activation=activation, inplace=inplace, in_channels=8, mode="addmm" @@ -464,10 +466,11 @@ def test_move_activation_before_concat__addmm(self, activation, inplace): neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) neutron_aten_pass_manager(exir_program_aten) # All passes by default. - exir_program_aten_quant = post_training_quantize( + exir_program_aten_quant = calibrate_and_quantize( exir_program_aten, calibration_inputs, NeutronQuantizer(neutron_target_spec), + is_qat=is_qat, ) # Check addmm and activation are in same QDQ cluster. @@ -498,18 +501,8 @@ def test_move_activation_before_concat__addmm(self, activation, inplace): == torch.ops.quantized_decomposed.quantize_per_tensor.default ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) - def test_move_activation_before_concat__mm(self, activation, inplace): + @parameterized.expand(all_activation_cases) + def test_move_activation_before_concat__mm(self, activation, inplace, is_qat): input_shape = (1, 8) model = LinearConcatActivationModule( activation=activation, inplace=inplace, in_channels=8, mode="mm" @@ -572,10 +565,11 @@ def test_move_activation_before_concat__mm(self, activation, inplace): neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec) neutron_aten_pass_manager(exir_program_aten) # All passes by default. - exir_program_aten_quant = post_training_quantize( + exir_program_aten_quant = calibrate_and_quantize( exir_program_aten, calibration_inputs, NeutronQuantizer(neutron_target_spec), + is_qat=is_qat, ) # Check mm and activation are in same QDQ cluster. @@ -606,19 +600,9 @@ def test_move_activation_before_concat__mm(self, activation, inplace): == torch.ops.quantized_decomposed.quantize_per_tensor.default ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) + @parameterized.expand(all_activation_cases) def test_move_activation_before_concat_quantization__conv( - self, activation, inplace + self, activation, inplace, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, @@ -631,7 +615,10 @@ def test_move_activation_before_concat_quantization__conv( ) edge_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, + input_shape, + use_qat=use_qat, + use_neutron_for_format_conversion=False, ).exported_program() # Make sure that all nodes were delegated. @@ -655,19 +642,9 @@ def test_move_activation_before_concat_quantization__conv( tflite_output_preprocess=ToChannelFirstPreprocess(), ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) + @parameterized.expand(all_activation_cases) def test_move_activation_before_concat_quantization__linear( - self, activation, inplace + self, activation, inplace, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, @@ -680,7 +657,7 @@ def test_move_activation_before_concat_quantization__linear( ) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. @@ -702,19 +679,9 @@ def test_move_activation_before_concat_quantization__linear( tfl_model=tflite_flatbuffers_model, ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) + @parameterized.expand(all_activation_cases) def test_move_activation_before_concat_quantization__addmm( - self, activation, inplace + self, activation, inplace, use_qat ): torch.manual_seed(23) with kgb.spy_on( @@ -728,7 +695,7 @@ def test_move_activation_before_concat_quantization__addmm( ) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. @@ -751,18 +718,10 @@ def test_move_activation_before_concat_quantization__addmm( atol=1.0, ) - @parameterized.expand( - [ - ["relu", True], - ["relu", False], - ["relu6", True], - ["relu6", False], - ["tanh", True], - ["tanh", False], - ["sigmoid", False], - ] - ) - def test_move_activation_before_concat_quantization__mm(self, activation, inplace): + @parameterized.expand(all_activation_cases) + def test_move_activation_before_concat_quantization__mm( + self, activation, inplace, use_qat + ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True, @@ -774,7 +733,7 @@ def test_move_activation_before_concat_quantization__mm(self, activation, inplac ) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. @@ -796,17 +755,9 @@ def test_move_activation_before_concat_quantization__mm(self, activation, inplac tfl_model=tflite_flatbuffers_model, ) - @parameterized.expand( - [ - ["relu", "relu", True, False], - ["relu6", "relu6", False, True], - ["tanh", "tanh", True, False], - ["sigmoid", "sigmoid", False, True], - ["relu", "relu_hardtanh", True, True], - ] - ) + @parameterized.expand(all_concat_cluster_cases) def test_concat_cluster_quantization__conv( - self, activation1, activation2, act1_inplace, act2_inplace + self, activation1, activation2, act1_inplace, act2_inplace, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, @@ -814,7 +765,7 @@ def test_concat_cluster_quantization__conv( owner=EdgeProgramToIRConverter, ) as converter_spy: with kgb.spy_on( - post_training_quantize, call_original=True + calibrate_and_quantize, call_original=True ) as quantizer_spy: input_shape = (1, 8, 8, 8) model = ConvActivationConcatModule( @@ -822,7 +773,10 @@ def test_concat_cluster_quantization__conv( ) edge_program = to_quantized_edge_program( - model, input_shape, use_neutron_for_format_conversion=False + model, + input_shape, + use_qat=use_qat, + use_neutron_for_format_conversion=False, ).exported_program() # Make sure that all nodes were delegated. @@ -877,17 +831,9 @@ def test_concat_cluster_quantization__conv( tflite_output_preprocess=ToChannelFirstPreprocess(), ) - @parameterized.expand( - [ - ["relu", "relu", True, False], - ["relu6", "relu6", False, True], - ["tanh", "tanh", True, False], - ["sigmoid", "sigmoid", False, True], - ["relu", "relu_hardtanh", True, True], - ] - ) + @parameterized.expand(all_concat_cluster_cases) def test_concat_cluster_quantization__linear( - self, activation1, activation2, act1_inplace, act2_inplace + self, activation1, activation2, act1_inplace, act2_inplace, use_qat ): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, @@ -895,7 +841,7 @@ def test_concat_cluster_quantization__linear( owner=EdgeProgramToIRConverter, ) as converter_spy: with kgb.spy_on( - post_training_quantize, call_original=True + calibrate_and_quantize, call_original=True ) as quantizer_spy: input_shape = (1, 8) model = LinearActivationConcatModule( @@ -903,7 +849,7 @@ def test_concat_cluster_quantization__linear( ) edge_program = to_quantized_edge_program( - model, input_shape + model, input_shape, use_qat=use_qat ).exported_program() # Make sure that all nodes were delegated. diff --git a/backends/nxp/tests/test_per_channel_conversion.py b/backends/nxp/tests/test_per_channel_conversion.py index 62cbef9e151..b3034ff17ed 100644 --- a/backends/nxp/tests/test_per_channel_conversion.py +++ b/backends/nxp/tests/test_per_channel_conversion.py @@ -31,11 +31,18 @@ ) from executorch.backends.nxp.tests.models import Conv2dModule from executorch.exir.dialects._ops import ops as exir_ops +from parameterized import parameterized from torch import fx from torch._ops import OpOverload from torch.export import ExportedProgram -from torchao.quantization.pt2e import MinMaxObserver, PerChannelMinMaxObserver +from torchao.quantization.pt2e import ( + FusedMovingAvgObsFakeQuantize, + MinMaxObserver, + MovingAverageMinMaxObserver, + MovingAveragePerChannelMinMaxObserver, + PerChannelMinMaxObserver, +) from torchao.quantization.pt2e.quantizer import ( DerivedQuantizationSpec, QuantizationConfig, @@ -45,8 +52,8 @@ class Conv2dPatternPerChannel(QuantizationPattern): - def __init__(self, is_per_channel: bool): - super().__init__() + def __init__(self, is_per_channel: bool, is_qat: bool): + super().__init__(is_qat=is_qat) self.is_per_channel = is_per_channel def partition_types(self) -> list[OpOverload]: @@ -80,9 +87,20 @@ def get_anchors( if self.is_per_channel else torch.per_tensor_symmetric ) - weight_observer_or_fake_quant_ctr = ( - PerChannelMinMaxObserver if self.is_per_channel else MinMaxObserver - ) + if self.is_qat: + observer = ( + MovingAveragePerChannelMinMaxObserver + if self.is_per_channel + else MovingAverageMinMaxObserver + ) + weight_observer_or_fake_quant_ctr = FusedMovingAvgObsFakeQuantize.with_args( + observer=observer + ) + else: + weight_observer_or_fake_quant_ctr = ( + PerChannelMinMaxObserver if self.is_per_channel else MinMaxObserver + ) + weight_quantization_spec = QuantizationSpec( dtype=torch.int8, observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr, @@ -108,7 +126,8 @@ def setUpClass(cls): torch.manual_seed(25) np.random.seed(25) - def test_per_channel_convolution(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_per_channel_convolution(self, _, use_qat: bool): with kgb.spy_on( EdgeProgramToIRConverter.convert_program, call_original=True, @@ -119,13 +138,18 @@ def test_per_channel_convolution(self): ) input_shape = (1, 8, 32, 32) - static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None) + activation_qspec = act_qspec(is_qat=use_qat) + static_qconfig = QuantizationConfig( + activation_qspec, activation_qspec, wgt_qspec, None + ) _ = to_quantized_edge_program( model, input_shape, get_quantizer_fn=lambda: NeutronAtenQuantizer( - Conv2dPatternPerChannel(is_per_channel=True), static_qconfig + Conv2dPatternPerChannel(is_per_channel=True, is_qat=use_qat), + static_qconfig, ), + use_qat=use_qat, use_neutron_for_format_conversion=False, ) diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py index 85736039d26..27422f9ce1e 100644 --- a/backends/nxp/tests/test_quantizer.py +++ b/backends/nxp/tests/test_quantizer.py @@ -5,6 +5,7 @@ # Tests for NeutronQuantizer. +import itertools from copy import deepcopy import executorch.backends.nxp.tests.executorch_pipeline as executorch_pipeline @@ -29,9 +30,17 @@ ToChannelLastPreprocess, ) from executorch.exir.dialects._ops import ops as exir_ops -from torch.export import ExportedProgram +from torch.export import export, ExportedProgram from torch.fx import GraphModule -from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e +from torchao.quantization.pt2e import ( + move_exported_model_to_eval, + move_exported_model_to_train, +) +from torchao.quantization.pt2e.quantize_pt2e import ( + convert_pt2e, + prepare_pt2e, + prepare_qat_pt2e, +) fuse_activation_ops = [ exir_ops.edge.aten.addmm.default, @@ -44,16 +53,45 @@ ] +# Permutation of all supported combinations of: +# , , +all_activation_cases = list( + itertools.product( + ["relu", "relu6", "tanh"], + [True, False], + [True, False], + ) +) + [ + ("sigmoid", False, True), + ("sigmoid", False, False), +] + + +@pytest.fixture(autouse=True) +def reseed_model_per_test_run(): + torch.manual_seed(23) + + +def _prepare_for_quantization(exported_model, is_qat: bool = False): + if is_qat: + return prepare_qat_pt2e( + exported_model.module(), NeutronQuantizer(neutron_target_spec, is_qat=True) + ) + else: + return prepare_pt2e( + exported_model.module(), NeutronQuantizer(neutron_target_spec) + ) + + def test_quantizer_conv2d(): model = models.Conv2dModule() model.eval() example_input = (torch.ones(1, 4, 32, 32),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -87,11 +125,10 @@ def test_quantizer_linear(): model.eval() example_input = (torch.ones(10, 32),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -123,11 +160,10 @@ def test_quantizer_maxpool2d(): model.eval() example_input = (torch.ones(1, 8, 32, 32),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -158,11 +194,10 @@ def test_quantizer_softmax(): model.eval() example_input = (torch.ones(1, 10),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -192,11 +227,10 @@ def test_quantizer_single_maxpool2d(): model.eval() example_input = (torch.ones(1, 4, 32, 32),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -214,11 +248,10 @@ def test_quantizer_conv2d_relu(): model.eval() example_input = (torch.ones(1, 4, 32, 32),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -241,11 +274,10 @@ def test_quantizer_conv2d_avg_pool2d(): model.eval() example_input = (torch.ones(1, 4, 16, 16),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -269,11 +301,10 @@ def test_quantizer_conv2d_permute(): model.eval() example_input = (torch.ones(1, 4, 16, 16),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -301,11 +332,10 @@ def test_multiple_shared_spec_ops_in_row(): model.eval() example_input = (torch.ones(1, 3, 64, 64),) - quantizer = NeutronQuantizer(neutron_target_spec) - graph_module = torch.export.export(model, example_input, strict=True).module() + exported_model = torch.export.export(model, example_input, strict=True) # noinspection PyTypeChecker - m = prepare_pt2e(graph_module, quantizer) + m = _prepare_for_quantization(exported_model) m(*example_input) m = convert_pt2e(m) @@ -362,21 +392,10 @@ def test_quantizers_order_invariance(): assert all(n == n_reversed for n, n_reversed in zip(nodes, nodes_reversed)) -@pytest.mark.parametrize( - "activation, inplace", - [ - ("relu", True), - ("relu", False), - ("relu6", True), - ("relu6", False), - ("tanh", True), - ("tanh", False), - ("sigmoid", False), - ], -) -def test_quantizer__linear_w_activation(mocker, activation, inplace): +@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases) +def test_quantizer__linear_w_activation(mocker, activation, inplace, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize") + quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize") input_shape = (1, 4) model = models.LinearActivationModule( @@ -386,7 +405,9 @@ def test_quantizer__linear_w_activation(mocker, activation, inplace): mode="linear", ) - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure that all nodes were delegated. assert not graph_contains_any_of_ops( @@ -418,28 +439,19 @@ def test_quantizer__linear_w_activation(mocker, activation, inplace): ) -@pytest.mark.parametrize( - "activation, inplace", - [ - ("relu", True), - ("relu", False), - ("relu6", True), - ("relu6", False), - ("tanh", True), - ("tanh", False), - ("sigmoid", False), - ], -) -def test_quantizer__addmm_w_activation(mocker, activation, inplace): +@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases) +def test_quantizer__addmm_w_activation(mocker, activation, inplace, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize") + quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize") input_shape = (1, 4) model = models.LinearActivationModule( activation=activation, inplace=inplace, in_channels=input_shape[1], mode="addmm" ) - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure that all nodes were delegated. assert not graph_contains_any_of_ops( @@ -471,28 +483,19 @@ def test_quantizer__addmm_w_activation(mocker, activation, inplace): ) -@pytest.mark.parametrize( - "activation, inplace", - [ - ("relu", True), - ("relu", False), - ("relu6", True), - ("relu6", False), - ("tanh", True), - ("tanh", False), - ("sigmoid", False), - ], -) -def test_quantizer__mm_w_activation(mocker, activation, inplace): +@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases) +def test_quantizer__mm_w_activation(mocker, activation, inplace, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize") + quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize") input_shape = (1, 4) model = models.LinearActivationModule( activation=activation, inplace=inplace, in_channels=input_shape[1], mode="mm" ) - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure that all nodes were delegated. assert not graph_contains_any_of_ops( @@ -524,28 +527,19 @@ def test_quantizer__mm_w_activation(mocker, activation, inplace): ) -@pytest.mark.parametrize( - "activation, inplace", - [ - ("relu", True), - ("relu", False), - ("relu6", True), - ("relu6", False), - ("tanh", True), - ("tanh", False), - ("sigmoid", False), - ], -) -def test_quantizer__conv_w_activation(mocker, activation, inplace): +@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases) +def test_quantizer__conv_w_activation(mocker, activation, inplace, use_qat): converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize") + quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize") input_shape = (1, 4, 8, 8) model = models.ConvActivationModule( activation=activation, inplace=inplace, in_channels=input_shape[1] ) - edge_program = to_quantized_edge_program(model, input_shape).exported_program() + edge_program = to_quantized_edge_program( + model, input_shape, use_qat=use_qat + ).exported_program() # Make sure that all nodes were delegated. assert not graph_contains_any_of_ops( @@ -579,3 +573,66 @@ def test_quantizer__conv_w_activation(mocker, activation, inplace): tflite_output_preprocess=ToChannelFirstPreprocess(), atol=1.0, ) + + +def test_qat_train(loss_tolerance: float = 0.02): + def evaluate(model, inputs, gts): + with torch.no_grad(): + test_outputs = model(inputs) + loss = torch.nn.functional.mse_loss(test_outputs, gts) + return loss + + def train_step(model, optimizer): + optimizer.zero_grad() + batch = torch.randn(100, 1).clamp(-1, 1) + outputs = model(batch) + loss = torch.nn.functional.mse_loss(outputs, torch.sin(batch)) + loss.backward() + optimizer.step() + + model = models.MLP() + model.train() + optimizer = torch.optim.SGD(model.parameters(), lr=0.01) + + for _ in range(100): + train_step(model, optimizer) + + test_inputs = torch.randn(20, 1).clamp(-1, 1) + + model.eval() + eval_loss = evaluate(model, test_inputs, torch.sin(test_inputs)) + + exported_model = export(model, (torch.randn(1, 1),), strict=True) + prepared_model = _prepare_for_quantization(exported_model, is_qat=True) + + prepared_model = move_exported_model_to_train(prepared_model) + for _ in range(30): + train_step(prepared_model, optimizer) + prepared_model = move_exported_model_to_eval(prepared_model) + + quantized_model = convert_pt2e(prepared_model) + + test_inputs = torch.randn(100, 1).clamp(-1, 1) + + quant_eval_loss = evaluate(quantized_model, test_inputs, torch.sin(test_inputs)) + + assert (quant_eval_loss - eval_loss) < loss_tolerance + + +def test_qat_produces_same_graph_as_ptq(): + model = models.MiniConvNetWithRegressionHead() + model.eval() + exported_model = export(model, ((torch.randn(1, 3, 32, 32),)), strict=True) + + qat_prepared_model = _prepare_for_quantization(exported_model, is_qat=True) + qat_quantized_model = convert_pt2e(qat_prepared_model) + + ptq_prepared_model = _prepare_for_quantization(exported_model, is_qat=False) + ptq_quantized_model = convert_pt2e(ptq_prepared_model) + + assert all( + ptqn.target == qatn.target + for qatn, ptqn in zip( + qat_quantized_model.graph.nodes, ptq_quantized_model.graph.nodes + ) + ) diff --git a/backends/nxp/tests/test_removing_dead_code.py b/backends/nxp/tests/test_removing_dead_code.py index 18d2f1d698e..8b3a979f412 100644 --- a/backends/nxp/tests/test_removing_dead_code.py +++ b/backends/nxp/tests/test_removing_dead_code.py @@ -10,9 +10,10 @@ import torch from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.backends.nxp.tests.executorch_pipeline import neutron_target_spec from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops +from parameterized import parameterized @pytest.fixture(autouse=True) @@ -39,7 +40,8 @@ def setUpClass(cls): torch.manual_seed(23) np.random.seed(23) - def test_removing_dead_code(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_removing_dead_code(self, _, is_qat: bool): input_shape = (42,) example_inputs = (torch.ones(input_shape),) model = DeadCodeModule() @@ -53,8 +55,8 @@ def test_removing_dead_code(self): # The `NeutronQuantizer` should remove the dead code in the `transform_for_annotation()` method. quantizer = NeutronQuantizer(neutron_target_spec) - exir_program_aten_quant = post_training_quantize( - exir_program_aten, [example_inputs], quantizer + exir_program_aten_quant = calibrate_and_quantize( + exir_program_aten, [example_inputs], quantizer, is_qat=is_qat ) # Make sure the is no `add` operation in the graph anymore. diff --git a/backends/nxp/tests/test_split_group_convolution.py b/backends/nxp/tests/test_split_group_convolution.py index f5dfcff1fde..e8d807963ee 100644 --- a/backends/nxp/tests/test_split_group_convolution.py +++ b/backends/nxp/tests/test_split_group_convolution.py @@ -18,7 +18,7 @@ from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.backends.nxp.tests.executorch_pipeline import ( get_random_calibration_inputs, neutron_target_spec, @@ -38,14 +38,15 @@ def _quantize_and_lower_module( - module: GraphModule, input_shape: tuple[int, ...], target="imxrt700" + module: GraphModule, input_shape: tuple[int, ...], is_qat: bool, target="imxrt700" ) -> EdgeProgramManager: calibration_inputs = get_random_calibration_inputs(to_model_input_spec(input_shape)) - exir_program_aten__module_quant = post_training_quantize( + exir_program_aten__module_quant = calibrate_and_quantize( module, calibration_inputs, NeutronQuantizer(neutron_target_spec), + is_qat=is_qat, ) edge_compile_config = EdgeCompileConfig(_check_ir_validity=False) @@ -70,12 +71,17 @@ def setUp(cls): @parameterized.expand( [ - ["group = 2", [1, 16, 10, 10], 2], - ["group = 3", [1, 24, 10, 10], 3], - ["group = 8", [1, 8, 10, 10], 8], + ["QAT; group = 2", [1, 16, 10, 10], 2, True], + ["PTQ; group = 2", [1, 16, 10, 10], 2, False], + ["QAT; group = 3", [1, 24, 10, 10], 3, True], + ["PTQ; group = 3", [1, 24, 10, 10], 3, False], + ["QAT; group = 8", [1, 8, 10, 10], 8, True], + ["PTQ; group = 8", [1, 8, 10, 10], 8, False], ] ) - def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int): + def test_split_group_convolution__2d( + self, _, input_shape: list[int], group: int, is_qat: bool + ): example_input = (torch.ones(input_shape),) module = Conv2dModule( @@ -116,7 +122,7 @@ def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int # Make sure the graph can be correctly quantized and lowered to edge. ep = _quantize_and_lower_module( - modified_module, tuple(input_shape) + modified_module, tuple(input_shape), is_qat=is_qat ).exported_program() nodes = list(ep.graph.nodes) assert nodes[-5].name == "lowered_module_0" @@ -127,12 +133,17 @@ def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int @parameterized.expand( [ - ["group = 2", [1, 16, 10], 2], - ["group = 3", [1, 24, 10], 3], - ["group = 6", [1, 24, 10], 6], + ["QAT; group = 2", [1, 16, 10], 2, True], + ["PTQ; group = 2", [1, 16, 10], 2, False], + ["QAT; group = 3", [1, 24, 10], 3, True], + ["PTQ; group = 3", [1, 24, 10], 3, False], + ["QAT; group = 6", [1, 24, 10], 6, True], + ["PTQ; group = 6", [1, 24, 10], 6, False], ] ) - def test_split_group_convolution__1d(self, _, input_shape: list[int], group: int): + def test_split_group_convolution__1d( + self, _, input_shape: list[int], group: int, is_qat: bool + ): example_input = (torch.ones(input_shape),) module = Conv1dModule( @@ -173,7 +184,7 @@ def test_split_group_convolution__1d(self, _, input_shape: list[int], group: int # Make sure the graph can be correctly quantized and lowered to edge. ep = _quantize_and_lower_module( - modified_module, tuple(input_shape) + modified_module, tuple(input_shape), is_qat=is_qat ).exported_program() nodes = list(ep.graph.nodes) assert nodes[-5].name == "lowered_module_0" @@ -219,7 +230,8 @@ def test_split_group_convolution__3d(self, _, input_shape: list[int], group: int out2 = modified_module(input_data).detach().numpy() assert np.allclose(out1, out2) - def test_split_group_convolution__applied_by_default(self): + @parameterized.expand([("QAT", True), ("PTQ", False)]) + def test_split_group_convolution__applied_by_default(self, _, is_qat: bool): input_shape = [1, 16, 10, 10] group = 2 example_input = (torch.ones(input_shape),) @@ -261,7 +273,7 @@ def test_split_group_convolution__applied_by_default(self): # Make sure the graph can be correctly quantized and lowered to edge. ep = _quantize_and_lower_module( - modified_module, tuple(input_shape) + modified_module, tuple(input_shape), is_qat=is_qat ).exported_program() nodes = list(ep.graph.nodes) assert nodes[-5].name == "lowered_module_0" diff --git a/backends/nxp/tests/use_qat.py b/backends/nxp/tests/use_qat.py new file mode 100644 index 00000000000..5994d5aa193 --- /dev/null +++ b/backends/nxp/tests/use_qat.py @@ -0,0 +1,11 @@ +import pytest + + +@pytest.fixture +def use_qat(request): + return request.param + + +def pytest_generate_tests(metafunc): + if "use_qat" in metafunc.fixturenames: + metafunc.parametrize("use_qat", [True, False], indirect=True) diff --git a/docs/source/backends-nxp.md b/docs/source/backends-nxp.md index 20dd180fb31..4f7e2e9c763 100644 --- a/docs/source/backends-nxp.md +++ b/docs/source/backends-nxp.md @@ -81,12 +81,12 @@ Or you can use the predefined function for post training quantization from NXP b ```python from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize ... target_spec = NeutronTargetSpec(target="imxrt700", converter_flavor="SDK_25_09") -quantized_graph_module = post_training_quantize( +quantized_graph_module = calibrate_and_quantize( aten_model, calibration_inputs, NeutronQuantizer(neutron_target_spec=target_spec), diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index d2f539f0de8..175dc9d8d70 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -27,7 +27,7 @@ from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.quantizer.utils import post_training_quantize +from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.devtools.visualization.visualization_utils import ( visualize_with_clusters, ) @@ -219,7 +219,7 @@ def get_model_and_inputs_from_name(model_name: str): ) calibration_inputs = example_inputs quantizer = NeutronQuantizer(neutron_target_spec) - module = post_training_quantize(module, calibration_inputs, quantizer) + module = calibrate_and_quantize(module, calibration_inputs, quantizer) if args.so_library is not None: logging.debug(f"Loading libraries: {args.so_library}")