diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
index e5e83d3e255..b9186884d5e 100644
--- a/backends/nxp/quantizer/neutron_quantizer.py
+++ b/backends/nxp/quantizer/neutron_quantizer.py
@@ -54,7 +54,13 @@
 )
 from torch import fx
 from torch.ao.quantization.quantizer.utils import _annotate_output_qspec
-from torchao.quantization.pt2e import HistogramObserver, MinMaxObserver
+from torchao.quantization.pt2e import (
+    FakeQuantize,
+    FusedMovingAvgObsFakeQuantize,
+    HistogramObserver,
+    MinMaxObserver,
+    MovingAverageMinMaxObserver,
+)
 from torchao.quantization.pt2e.quantizer import (
     ComposableQuantizer,
     DerivedQuantizationSpec,
@@ -154,78 +160,120 @@ def get_supported_operators(cls) -> list[OperatorConfig]:
 
 
 # Quantization Specification used by Neutron NPU
-act_qspec = QuantizationSpec(
-    dtype=torch.int8,
-    quant_min=-128,
-    quant_max=127,
-    qscheme=torch.per_tensor_affine,
-    is_dynamic=False,
-    observer_or_fake_quant_ctr=HistogramObserver.with_args(eps=2**-12),
-)
-
-wgt_qspec = QuantizationSpec(
-    dtype=torch.int8,
-    quant_min=-127,
-    quant_max=127,
-    qscheme=torch.per_tensor_symmetric,
-    is_dynamic=False,
-    observer_or_fake_quant_ctr=MinMaxObserver,
-    ch_axis=0,
-)
+def act_qspec(is_qat: bool):
+    eps = 2**-12
+    observer_or_fake_quant_ctr = (
+        FusedMovingAvgObsFakeQuantize.with_args(
+            observer=MovingAverageMinMaxObserver, eps=eps
+        )
+        if is_qat
+        else HistogramObserver.with_args(eps=eps)
+    )
+
+    return QuantizationSpec(
+        dtype=torch.int8,
+        quant_min=-128,
+        quant_max=127,
+        qscheme=torch.per_tensor_affine,
+        is_dynamic=False,
+        observer_or_fake_quant_ctr=observer_or_fake_quant_ctr,
+    )
+
+
+def wgt_qspec(is_qat: bool):
+    observer_or_fake_quant_ctr = (
+        FakeQuantize.with_args(observer=MovingAverageMinMaxObserver)
+        if is_qat
+        else MinMaxObserver
+    )
+
+    return QuantizationSpec(
+        dtype=torch.int8,
+        quant_min=-127,
+        quant_max=127,
+        qscheme=torch.per_tensor_symmetric,
+        is_dynamic=False,
+        observer_or_fake_quant_ctr=observer_or_fake_quant_ctr,
+        ch_axis=0,
+    )
+
+
+def wgt_fc_qspec(is_qat: bool):
+    observer_or_fake_quant_ctr = (
+        FakeQuantize.with_args(observer=MovingAverageMinMaxObserver)
+        if is_qat
+        else MinMaxObserver
+    )
+
+    return QuantizationSpec(
+        dtype=torch.int8,
+        quant_min=-127,
+        quant_max=127,
+        qscheme=torch.per_tensor_symmetric,
+        is_dynamic=False,
+        observer_or_fake_quant_ctr=observer_or_fake_quant_ctr,
+    )
 
-wgt_fc_qspec = QuantizationSpec(
-    dtype=torch.int8,
-    quant_min=-127,
-    quant_max=127,
-    qscheme=torch.per_tensor_symmetric,
-    is_dynamic=False,
-    observer_or_fake_quant_ctr=MinMaxObserver,
-)
 
 # Is set by the *PatternQuantizer directly.
 bias_qspec = None
 
 
 class NeutronQuantizer(ComposableQuantizer):
-    def __init__(self, neutron_target_spec: NeutronTargetSpec):
+    def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False):
         self.neutron_target_spec = neutron_target_spec
-        static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None)
-        static_fc_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_fc_qspec, None)
+        self.is_qat = is_qat
+
+        static_qconfig = QuantizationConfig(
+            act_qspec(is_qat=is_qat),
+            act_qspec(is_qat=is_qat),
+            wgt_qspec(is_qat=is_qat),
+            None,
+        )
+        static_fc_qconfig = QuantizationConfig(
+            act_qspec(is_qat=is_qat),
+            act_qspec(is_qat=is_qat),
+            wgt_fc_qspec(is_qat=is_qat),
+            None,
+        )
+
+        OpQuantizer = NeutronAtenQuantizer
         super().__init__(
             [
-                NeutronAtenQuantizer(AbsPattern(), static_qconfig),
-                NeutronAtenQuantizer(AdaptiveAvgPoolPattern(), static_qconfig),
-                NeutronAtenQuantizer(AddTensorPattern(), static_qconfig),
-                NeutronAtenQuantizer(AddmmPattern(self), static_fc_qconfig),
-                NeutronAtenQuantizer(AvgPoolPattern(), static_qconfig),
-                NeutronAtenQuantizer(CatPattern(), static_qconfig),
-                NeutronAtenQuantizer(Conv1dPattern(), static_qconfig),
-                NeutronAtenQuantizer(Conv2dPattern(self), static_qconfig),
-                NeutronAtenQuantizer(ConvTranspose2dPattern(), static_qconfig),
-                NeutronAtenQuantizer(DropoutPattern(), static_qconfig),
-                NeutronAtenQuantizer(FlattenPattern(), static_qconfig),
-                NeutronAtenQuantizer(HardTanhPattern(), static_qconfig),
-                NeutronAtenQuantizer(HardTanhInPlacePattern(), static_qconfig),
-                NeutronAtenQuantizer(LinearPattern(self), static_fc_qconfig),
-                NeutronAtenQuantizer(MaxPoolPattern(), static_qconfig),
-                NeutronAtenQuantizer(MeanDimPattern(), static_qconfig),
-                NeutronAtenQuantizer(MmPattern(self), static_qconfig),
-                NeutronAtenQuantizer(MulTensorPattern(), static_qconfig),
-                NeutronAtenQuantizer(PadPattern(), static_qconfig),
-                NeutronAtenQuantizer(PermutePattern(), static_qconfig),
-                NeutronAtenQuantizer(ReluPattern(), static_qconfig),
-                NeutronAtenQuantizer(ReluInPlacePattern(), static_qconfig),
-                NeutronAtenQuantizer(ReshapePattern(), static_qconfig),
-                NeutronAtenQuantizer(SigmoidPattern(), static_qconfig),
-                NeutronAtenQuantizer(SliceTensorPattern(), static_qconfig),
-                NeutronAtenQuantizer(SoftMaxPattern(), static_qconfig),
-                NeutronAtenQuantizer(SubTensorPattern(), static_qconfig),
-                NeutronAtenQuantizer(TanhPattern(), static_qconfig),
-                NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig),
-                NeutronAtenQuantizer(TransposeIntPattern(), static_qconfig),
-                NeutronAtenQuantizer(ViewPattern(), static_qconfig),
+                OpQuantizer(AbsPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(AdaptiveAvgPoolPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(AddTensorPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(AddmmPattern(self, is_qat=is_qat), static_fc_qconfig),
+                OpQuantizer(AvgPoolPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(CatPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(Conv1dPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(Conv2dPattern(self, is_qat=is_qat), static_qconfig),
+                OpQuantizer(ConvTranspose2dPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(DropoutPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(FlattenPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(HardTanhPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(HardTanhInPlacePattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(LinearPattern(self, is_qat=is_qat), static_fc_qconfig),
+                OpQuantizer(MaxPoolPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(MeanDimPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(MmPattern(self, is_qat=is_qat), static_qconfig),
+                OpQuantizer(MulTensorPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(PadPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(PermutePattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(ReluPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(ReluInPlacePattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(ReshapePattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SigmoidPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SliceTensorPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SoftMaxPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(SubTensorPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(TanhPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(TanhInPlacePattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(TransposeIntPattern(is_qat=is_qat), static_qconfig),
+                OpQuantizer(ViewPattern(is_qat=is_qat), static_qconfig),
             ]
         )
+
         # Mapping ops defined in quantizer partition types to its quantizer
         self.op_to_quantizer = {
             pt: q for q in self.quantizers for pt in q.pattern.partition_types()
@@ -235,7 +283,9 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec):
             pt: False for q in self.quantizers for pt in q.pattern.partition_types()
         }
         self.cluster_quantizers = [
-            NeutronAtenQuantizer(ActivationsConcatClusterPattern(self), static_qconfig)
+            NeutronAtenQuantizer(
+                ActivationsConcatClusterPattern(self, is_qat=is_qat), static_qconfig
+            )
         ]
 
     def transform_for_annotation(
@@ -288,7 +338,7 @@ def _annotate_inputs(self, model: fx.GraphModule):
                 continue
 
             if node.op == "placeholder" and len(node.users) > 0:
-                _annotate_output_qspec(node, act_qspec)
+                _annotate_output_qspec(node, act_qspec(self.is_qat))
                 self._mark_input_node_as_annotated(node)
 
     def validate(self, model: torch.fx.GraphModule) -> None:
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
index 3bf0afa3a7d..e8f247d4bbc 100644
--- a/backends/nxp/quantizer/patterns.py
+++ b/backends/nxp/quantizer/patterns.py
@@ -14,7 +14,11 @@
 from torch import fx
 from torch._ops import OpOverload
 from torch.fx import Node
-from torchao.quantization.pt2e import PerChannelMinMaxObserver
+from torchao.quantization.pt2e import (
+    FakeQuantize,
+    MovingAveragePerChannelMinMaxObserver,
+    PerChannelMinMaxObserver,
+)
 from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     FixedQParamsQuantizationSpec,
@@ -59,7 +63,8 @@ class PartitionAnchors:
         | tuple[fx.Node, NodeArgsIdx, SharedQuantizationSpec],
     ] = field(default_factory=list)
     weights: list[
-        tuple[fx.Node, NodeArgsIdx] | tuple[fx.Node, NodeArgsIdx, QuantizationSpec],
+        tuple[fx.Node, NodeArgsIdx]
+        | tuple[fx.Node, NodeArgsIdx, QuantizationSpec | FakeQuantize],
     ] = field(default_factory=list)
     biases: list[
         tuple[fx.Node, NodeArgsIdx]
@@ -69,12 +74,18 @@ class PartitionAnchors:
     literals: list[tuple[fx.Node, NodeArgsIdx]] = field(default_factory=list)
     output: list[
         tuple[fx.Node]
-        | tuple[fx.Node, FixedQParamsQuantizationSpec | SharedQuantizationSpec],
+        | tuple[
+            fx.Node,
+            FixedQParamsQuantizationSpec | SharedQuantizationSpec,
+        ],
     ] = field(default_factory=list)
     empty: bool = False
 
 
 class QuantizationPattern(ABC):
+    def __init__(self, is_qat: bool = False):
+        self.is_qat = is_qat
+
     @abstractmethod
     def partition_types(self) -> list[OpOverload]:
         """
@@ -148,11 +159,12 @@ def get_anchors_for_fixed_quant_specs(
     zero_point: int,
     quant_min: int = -128,
     quant_max: int = 127,
+    is_qat: bool = False,
 ) -> PartitionAnchors:
     node = fused_partition[0].nodes[-1]
     assert len(fused_partition[0].input_nodes) == 1
 
-    qspec = FixedQParamsQuantizationSpec(
+    qspec_or_fake_quantize = FixedQParamsQuantizationSpec(
         dtype=torch.int8,
         scale=scale,
         zero_point=zero_point,
@@ -166,7 +178,7 @@ def get_anchors_for_fixed_quant_specs(
         weights=[],
         biases=[],
         output=[
-            (node, qspec),
+            (node, qspec_or_fake_quantize),
         ],
     )
 
@@ -190,7 +202,9 @@ def partition_types(self):
 
 
 class AddmmPattern(QuantizationPattern):
-    def __init__(self, neutron_quantizer):
+    def __init__(self, neutron_quantizer, is_qat: bool):
+        super().__init__(is_qat=is_qat)
+
         self.neutron_quantizer = neutron_quantizer
         self.neutron_target_info = (
             self.neutron_quantizer.neutron_target_spec.neutron_target_info
@@ -365,7 +379,11 @@ def get_anchors(
             ch_axis=0,
         )
 
-        weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver
+        weight_observer_or_fake_quant_ctr = (
+            FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver)
+            if self.is_qat
+            else PerChannelMinMaxObserver
+        )
         weight_quantization_spec = QuantizationSpec(
             dtype=torch.int8,
             observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr,
@@ -399,7 +417,9 @@ def partition_types(self) -> list[OpOverload]:
 
 
 class Conv2dPattern(ConvPattern):
-    def __init__(self, neutron_quantizer):
+    def __init__(self, neutron_quantizer, is_qat: bool = False):
+        super().__init__(is_qat=is_qat)
+
         self.neutron_quantizer = neutron_quantizer
         self.neutron_target_info = (
             self.neutron_quantizer.neutron_target_spec.neutron_target_info
@@ -426,7 +446,11 @@ def get_anchors(
             ch_axis=0,
         )
 
-        weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver
+        weight_observer_or_fake_quant_ctr = (
+            FakeQuantize.with_args(observer=MovingAveragePerChannelMinMaxObserver)
+            if self.is_qat
+            else PerChannelMinMaxObserver
+        )
         weight_quantization_spec = QuantizationSpec(
             dtype=torch.int8,
             observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr,
@@ -563,7 +587,9 @@ def replacement_op(self):
 
 
 class LinearPattern(QuantizationPattern):
-    def __init__(self, neutron_quantizer):
+    def __init__(self, neutron_quantizer, is_qat: bool = False):
+        super().__init__(is_qat=is_qat)
+
         self.neutron_quantizer = neutron_quantizer
         self.neutron_target_info = (
             self.neutron_quantizer.neutron_target_spec.neutron_target_info
@@ -637,7 +663,9 @@ def partition_types(self):
 
 
 class MmPattern(QuantizationPattern):
-    def __init__(self, neutron_quantizer):
+    def __init__(self, neutron_quantizer, is_qat: bool = False):
+        super().__init__(is_qat=is_qat)
+
         self.neutron_quantizer = neutron_quantizer
         self.neutron_target_info = (
             self.neutron_quantizer.neutron_target_spec.neutron_target_info
@@ -802,7 +830,7 @@ def get_anchors(
         self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors:
         return get_anchors_for_fixed_quant_specs(
-            fused_partition, scale=1.0 / 256.0, zero_point=-128
+            fused_partition, scale=1.0 / 256.0, zero_point=-128, is_qat=self.is_qat
         )
 
 
@@ -820,7 +848,7 @@ def get_anchors(
         self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors:
         return get_anchors_for_fixed_quant_specs(
-            fused_partition, scale=1.0 / 256.0, zero_point=-128
+            fused_partition, scale=1.0 / 256.0, zero_point=-128, is_qat=self.is_qat
         )
 
 
@@ -838,7 +866,7 @@ def get_anchors(
         self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors:
         return get_anchors_for_fixed_quant_specs(
-            fused_partition, scale=1.0 / 128.0, zero_point=0
+            fused_partition, scale=1.0 / 128.0, zero_point=0, is_qat=self.is_qat
         )
 
 
@@ -856,7 +884,7 @@ def get_anchors(
         self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors:
         return get_anchors_for_fixed_quant_specs(
-            fused_partition, scale=1.0 / 128.0, zero_point=0
+            fused_partition, scale=1.0 / 128.0, zero_point=0, is_qat=self.is_qat
         )
 
 
@@ -884,7 +912,9 @@ class ActivationsConcatClusterPattern(QuantizationPattern):
                       │
     """
 
-    def __init__(self, neutron_quantizer):
+    def __init__(self, neutron_quantizer, is_qat: bool = False):
+        super().__init__(is_qat=is_qat)
+
         self.neutron_quantizer = neutron_quantizer
         self.neutron_target_info = (
             self.neutron_quantizer.neutron_target_spec.neutron_target_info
diff --git a/backends/nxp/quantizer/utils.py b/backends/nxp/quantizer/utils.py
index 389526111cb..6dc58e8114a 100644
--- a/backends/nxp/quantizer/utils.py
+++ b/backends/nxp/quantizer/utils.py
@@ -15,13 +15,18 @@
 import torch
 from torch import fx
 from torch._ops import OpOverload
+from torch.ao.quantization import move_exported_model_to_eval
 from torch.export import ExportedProgram
 from torch.fx.passes.utils.source_matcher_utils import (
     check_subgraphs_connected,
     SourcePartition,
 )
 from torchao.quantization.pt2e import ObserverOrFakeQuantize
-from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
 from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY, Quantizer
 
 
@@ -154,10 +159,11 @@ def find_sequential_partitions_aten(
     return fused_partitions
 
 
-def post_training_quantize(
+def calibrate_and_quantize(
     model: ExportedProgram | fx.GraphModule,
     calibration_inputs: Iterable[tuple[torch.Tensor, ...]],
     quantizer: Quantizer,
+    is_qat: bool = False,
 ) -> fx.GraphModule:
     """Quantize the provided model.
 
@@ -165,6 +171,8 @@ def post_training_quantize(
     :param calibration_inputs: Either a tuple of calibration input tensors where each element corresponds to a model
                                 input. Or an iterator over such tuples.
     :param quantizer: Quantizer to use.
+    :param is_qat: Whether quantization is done using Quantization Aware Training (QAT) or not.
+                    Note: In QAT mode, training is not performed. Only calibration (in eval mode) is done.
 
     :return: Quantized GraphModule.
     """
@@ -172,7 +180,12 @@ def post_training_quantize(
     if isinstance(model, ExportedProgram):
         model = model.module()
 
-    m = prepare_pt2e(model, quantizer)
+    if is_qat:
+        m = prepare_qat_pt2e(model, quantizer)
+        m = move_exported_model_to_eval(m)
+    else:
+        m = prepare_pt2e(model, quantizer)
+
     for data in calibration_inputs:
         m(*data)
     m = convert_pt2e(m)
diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py
index 76cfd5fa24b..61af7b5c67f 100644
--- a/backends/nxp/tests/executorch_pipeline.py
+++ b/backends/nxp/tests/executorch_pipeline.py
@@ -26,7 +26,7 @@
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 from executorch.exir import (
     EdgeCompileConfig,
     EdgeProgramManager,
@@ -59,8 +59,8 @@ def get_random_calibration_inputs(
     ]
 
 
-def _get_default_quantizer(target_spec: NeutronTargetSpec) -> Quantizer:
-    return NeutronQuantizer(target_spec)
+def _get_default_quantizer(target_spec: NeutronTargetSpec, use_qat: bool) -> Quantizer:
+    return NeutronQuantizer(target_spec, is_qat=use_qat)
 
 
 def to_model_input_spec(
@@ -93,6 +93,7 @@ def to_quantized_edge_program(
     ] = get_random_calibration_inputs,
     target="imxrt700",
     neutron_converter_flavor=neutron_converter_flavor,
+    use_qat=False,
     remove_quant_io_ops=False,
     custom_delegation_options=CustomDelegationOptions(),  # noqa B008
     get_quantizer_fn=None,
@@ -100,7 +101,9 @@ def to_quantized_edge_program(
 ) -> EdgeProgramManager:
     _neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor)
     if get_quantizer_fn is None:
-        get_quantizer_fn = partial(_get_default_quantizer, _neutron_target_spec)
+        get_quantizer_fn = partial(
+            _get_default_quantizer, _neutron_target_spec, use_qat
+        )
 
     calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec))
     example_input = calibration_inputs[0]
@@ -110,10 +113,11 @@ def to_quantized_edge_program(
 
     exir_program_aten = torch.export.export(model, example_input, strict=True)
 
-    exir_program_aten__module_quant = post_training_quantize(
-        exir_program_aten,
-        calibration_inputs,
-        get_quantizer_fn(),
+    exir_program_aten__module_quant = calibrate_and_quantize(
+        model=exir_program_aten,
+        calibration_inputs=calibration_inputs,
+        quantizer=get_quantizer_fn(),
+        is_qat=use_qat,
     )
 
     compile_spec = generate_neutron_compile_spec(
@@ -150,11 +154,13 @@ def to_quantized_edge_program(
 def to_quantized_executorch_program(
     model: torch.nn.Module,
     input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]],
+    use_qat: bool = False,
     use_neutron_for_format_conversion: bool = True,
 ) -> ExecutorchProgramManager:
     edge_program_manager = to_quantized_edge_program(
         model,
         input_spec,
+        use_qat=use_qat,
         use_neutron_for_format_conversion=use_neutron_for_format_conversion,
     )
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py
index 96b9abfe117..2e9a1b393ff 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_abs_converter.py
@@ -20,6 +20,7 @@
 
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -63,13 +64,13 @@ def forward(self, x):
         return x.abs()
 
 
-def test_conv_abs(mocker, input_shape: tuple[int] = (1, 3, 112, 112)):
+def test_conv_abs(mocker, use_qat, input_shape: tuple[int] = (1, 3, 112, 112)):
     model = ConvBlocksWithAbs(conv_in_channels=input_shape[1])
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     quantized_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
 
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py
index a80d2014487..db5cbdcbb5e 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_adaptive_avg_pool2d_converter.py
@@ -16,6 +16,7 @@
     AdaptiveAvgPool2dConvModule,
 )
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -40,7 +41,7 @@ def reseed_model_per_test_run():
     ],
 )
 def test_adaptive_avg_pool_2d_delegated_quant_conversion(
-    mocker, input_shape, output_size
+    mocker, input_shape, output_size, use_qat
 ):
     model = AdaptiveAvgPool2dConvModule(output_size)
 
@@ -48,7 +49,7 @@ def test_adaptive_avg_pool_2d_delegated_quant_conversion(
 
     # Run conversion
     edge_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
     nodes = [str(node) for node in edge_program.graph.nodes]
 
@@ -86,7 +87,7 @@ def test_adaptive_avg_pool_2d_delegated_quant_conversion(
     ],
 )
 def test_adaptive_avg_pool_2d_non_delegated_quant_conversion(
-    mocker, input_shape, output_size
+    mocker, input_shape, output_size, use_qat
 ):
     model = AdaptiveAvgPool2dConvModule(output_size)
 
@@ -94,7 +95,7 @@ def test_adaptive_avg_pool_2d_non_delegated_quant_conversion(
 
     # Run conversion
     edge_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
     nodes = list(edge_program.graph.nodes)
 
@@ -119,7 +120,7 @@ def test_adaptive_avg_pool_2d_non_delegated_quant_conversion(
     )
 
 
-def test_adaptive_avg_pool_2d_mean_dim_quant_conversion(mocker):
+def test_adaptive_avg_pool_2d_mean_dim_quant_conversion(mocker, use_qat):
     input_shape = (1, 4, 16, 16)
     model = AdaptiveAvgPool2dConvMeanDimModule()
 
@@ -127,7 +128,7 @@ def test_adaptive_avg_pool_2d_mean_dim_quant_conversion(mocker):
 
     # Run conversion
     _ = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     )
 
     # Capture generated model
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py
index 02e799723d4..1aa58ab5d95 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_add_tensor_converter.py
@@ -21,6 +21,7 @@
     AddTensorOneInputModule,
 )
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -38,13 +39,13 @@ def reseed_model_per_test_run():
         pytest.param((1, 4, 8, 8), id="4D."),
     ],
 )
-def test_add_tensor_quant_conversion(mocker, input_shape):
+def test_add_tensor_quant_conversion(mocker, input_shape, use_qat):
     model = AddTensorModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, [input_shape, input_shape])
+    _ = to_quantized_edge_program(model, [input_shape, input_shape], use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -69,13 +70,13 @@ def test_add_tensor_quant_conversion(mocker, input_shape):
         pytest.param((1, 4, 8, 8), id="4D."),
     ],
 )
-def test_add_tensor_one_input_quant_conversion(mocker, input_shape):
+def test_add_tensor_one_input_quant_conversion(mocker, input_shape, use_qat):
     model = AddTensorOneInputModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, input_shape)
+    _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -97,14 +98,14 @@ def test_add_tensor_one_input_quant_conversion(mocker, input_shape):
         pytest.param((1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."),
     ],
 )
-def test_add_tensor_w_conv_quant_conversion(mocker, input_shape):
+def test_add_tensor_w_conv_quant_conversion(mocker, input_shape, use_qat):
     model = AddTensorConvModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     _ = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     )
 
     # Capture generated model
@@ -137,13 +138,13 @@ def test_add_tensor_w_conv_quant_conversion(mocker, input_shape):
     ],
 )
 def test_add_tensor_broadcasting_unsupported_quant_conversion(
-    x_input_shape, y_input_shape
+    x_input_shape, y_input_shape, use_qat
 ):
     model = AddTensorModule()
 
     # Run conversion
     edge_program = to_quantized_edge_program(
-        model, [x_input_shape, y_input_shape]
+        model, [x_input_shape, y_input_shape], use_qat=use_qat
     ).exported_program()
     nodes = list(edge_program.graph.nodes)
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py
index a6f5ef8c93b..a8cdee41830 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_addmm_converter.py
@@ -19,6 +19,7 @@
 )
 from executorch.backends.nxp.tests.models import AddmmModule, LinearModule
 from executorch.exir.dialects._ops import ops as exir_ops
+from parameterized import parameterized
 from torch.export import ExportedProgram
 
 
@@ -28,7 +29,8 @@ def setUpClass(cls):
         torch.manual_seed(23)
         np.random.seed(42)
 
-    def test_addmm_conversion(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_addmm_conversion(self, _, use_qat: bool):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
             call_original=True,
@@ -38,7 +40,7 @@ def test_addmm_conversion(self):
             model = AddmmModule(input_shape[1])
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
@@ -60,7 +62,8 @@ def test_addmm_conversion(self):
                 tfl_model=tflite_flatbuffers_model,
             )
 
-    def test_linear_conversion__with_bias(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_linear_conversion__with_bias(self, _, use_qat: bool):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
             call_original=True,
@@ -70,7 +73,7 @@ def test_linear_conversion__with_bias(self):
             model = LinearModule(bias=True)
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
index 7aed0236043..b6083d1e816 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_avg_pool2d_converter.py
@@ -28,6 +28,7 @@
 )
 from executorch.backends.nxp.tests.models import AvgPool2dConvModule, AvgPool2dModule
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -143,14 +144,16 @@ def test_avg_pool_2d_conversion(input_shape, padding, count_include_pad):
         ),
     ],
 )
-def test_avg_pool_2d_quant_conversion(mocker, input_shape, padding, count_include_pad):
+def test_avg_pool_2d_quant_conversion(
+    mocker, input_shape, padding, count_include_pad, use_qat
+):
     model = AvgPool2dConvModule(padding=padding, count_include_pad=count_include_pad)
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     _ = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     )
 
     # Capture generated model
@@ -170,7 +173,7 @@ def test_avg_pool_2d_quant_conversion(mocker, input_shape, padding, count_includ
     )
 
 
-def test_avg_pool_2d_quant_conversion__padded(mocker):
+def test_avg_pool_2d_quant_conversion__padded(mocker, use_qat):
     input_shape = (1, 8, 8, 8)
     model = AvgPool2dModule(True, 1)
 
@@ -179,7 +182,7 @@ def test_avg_pool_2d_quant_conversion__padded(mocker):
 
     # Run conversion
     _ = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     )
 
     # Capture the converter operators.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py
index 590b0be6a6b..e3ee2fff90b 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py
@@ -22,6 +22,7 @@
 )
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 def _normalized_dim(dim, rank):
@@ -84,13 +85,13 @@ def forward(self, *inputs: torch.Tensor):
         pytest.param(4, 5, -3, id="4D, 5 inputs, dim=-3"),
     ],
 )
-def test_cat__same_shapes(dim, num_inputs, rank, mocker):
+def test_cat__same_shapes(dim, num_inputs, rank, mocker, use_qat):
     input_shape = tuple([8, 8, 8, 8][:rank])
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     quantized_program = to_quantized_edge_program(
-        CatModule(dim), [input_shape] * num_inputs
+        CatModule(dim), [input_shape] * num_inputs, use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -115,13 +116,13 @@ def test_cat__same_shapes(dim, num_inputs, rank, mocker):
 
 @pytest.mark.parametrize("dim", [3, -2, -3])
 @pytest.mark.parametrize("num_inputs", [2, 5])
-def test_cat__channels_first__same_shapes(dim, num_inputs, mocker):
+def test_cat__channels_first__same_shapes(dim, num_inputs, mocker, use_qat):
     input_shape = (2, 8, 6, 8)
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     channels = input_shape[1] if dim not in {1, -3} else input_shape[1] * num_inputs
     quantized_program = to_quantized_edge_program(
-        CatConvModule(dim, channels), [input_shape] * num_inputs
+        CatConvModule(dim, channels), [input_shape] * num_inputs, use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -158,13 +159,13 @@ def test_cat__channels_first__same_shapes(dim, num_inputs, mocker):
         pytest.param(-2, (1, 1, 1, 8), id="axis = -2"),
     ],
 )
-def test_cat__unsupported__imxrt700(dim, input_shape):
+def test_cat__unsupported__imxrt700(dim, input_shape, use_qat):
     """This test is conjoined with the one below (`test_cat__context_dependent__imxrt700`).
     In this case, the inputs of the `cat` are NOT compute ops, so the `cat` is NOT delegated.
     """
     num_inputs = 2
     quantized_program = to_quantized_edge_program(
-        CatModule(dim), [input_shape] * num_inputs, target="imxrt700"
+        CatModule(dim), [input_shape] * num_inputs, target="imxrt700", use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was NOT delegated.
@@ -188,13 +189,16 @@ def test_cat__unsupported__imxrt700(dim, input_shape):
         pytest.param(-2, (1, 1, 1, 8), id="axis = -2"),
     ],
 )
-def test_cat__context_dependent__imxrt700(dim, input_shape):
+def test_cat__context_dependent__imxrt700(dim, input_shape, use_qat):
     """This test is conjoined with the one above (`test_cat__unsupported__imxrt700`).
     In this case, the inputs of the `cat` are compute ops, so the `cat` is delegated.
     """
     num_inputs = 2
     ep = to_quantized_edge_program(
-        AddCatModule(dim), [input_shape] * num_inputs, target="imxrt700"
+        AddCatModule(dim),
+        [input_shape] * num_inputs,
+        target="imxrt700",
+        use_qat=use_qat,
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -218,7 +222,7 @@ def test_cat__context_dependent__imxrt700(dim, input_shape):
         pytest.param(4, 5, -3, id="4D, 5 inputs, dim=-3"),
     ],
 )
-def test_cat__different_shapes(dim, num_inputs, rank, mocker):
+def test_cat__different_shapes(dim, num_inputs, rank, mocker, use_qat):
     input_shape = tuple([2, 8, 8, 8, 8][-rank:])
 
     # The shape of every input will be different along the concatenated dimension.
@@ -231,7 +235,7 @@ def test_cat__different_shapes(dim, num_inputs, rank, mocker):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     quantized_program = to_quantized_edge_program(
-        CatModule(dim), input_shapes
+        CatModule(dim), input_shapes, use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -258,7 +262,7 @@ def test_cat__different_shapes(dim, num_inputs, rank, mocker):
 @pytest.mark.parametrize(
     "num_inputs", [2, 5], ids=lambda num_inputs: f"num_inputs = {num_inputs}"
 )
-def test_cat__channels_first__different_shapes(dim, num_inputs, mocker):
+def test_cat__channels_first__different_shapes(dim, num_inputs, mocker, use_qat):
     input_shape = (2, 8, 6, 8)
 
     # The shape of every input will be different along the concatenated dimension.
@@ -276,7 +280,7 @@ def test_cat__channels_first__different_shapes(dim, num_inputs, mocker):
         sum(shape[1] for shape in input_shapes) if dim in [1, -3] else input_shape[1]
     )
     quantized_program = to_quantized_edge_program(
-        CatConvModule(dim, channels), input_shapes
+        CatConvModule(dim, channels), input_shapes, use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -301,7 +305,7 @@ def test_cat__channels_first__different_shapes(dim, num_inputs, mocker):
     )
 
 
-def test_cat__different_shapes__unsupported_channels__imxrt700():
+def test_cat__different_shapes__unsupported_channels__imxrt700(use_qat):
     input_shape = (2, 4, 6, 7)  # (channels % 8) != 0
 
     num_inputs = 2
@@ -315,7 +319,7 @@ def test_cat__different_shapes__unsupported_channels__imxrt700():
         input_shapes.append(tuple(tmp_shape))
 
     quantized_program = to_quantized_edge_program(
-        CatModule(dim), input_shapes, target="imxrt700"
+        CatModule(dim), input_shapes, target="imxrt700", use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was NOT delegated.
@@ -327,7 +331,7 @@ def test_cat__different_shapes__unsupported_channels__imxrt700():
     )
 
 
-def test_cat__force_delegate():
+def test_cat__force_delegate(use_qat):
     target = "imxrt700"
 
     # The Partitioner doesn't know if the `8` or the `1` will become the channels in the IR. Therefore, it would
@@ -339,6 +343,7 @@ def test_cat__force_delegate():
         [input_shape, input_shape],
         target=target,
         custom_delegation_options=CustomDelegationOptions(force_delegate_cat=True),
+        use_qat=use_qat,
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -348,7 +353,7 @@ def test_cat__force_delegate():
     assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
 
 
-def test_cat__same_shapes_converter_padding_last_dimension():
+def test_cat__same_shapes_converter_padding_last_dimension(use_qat):
     target = "imxrt700"
 
     # The Converter is capable of padding the last dimension of `cat` with the same input shapes.
@@ -360,6 +365,7 @@ def test_cat__same_shapes_converter_padding_last_dimension():
         target=target,
         neutron_converter_flavor="SDK_25_09",
         custom_delegation_options=CustomDelegationOptions(),
+        use_qat=use_qat,
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -369,7 +375,7 @@ def test_cat__same_shapes_converter_padding_last_dimension():
     assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
 
 
-def test_cat__same_shapes__channels_first__padding_channels():
+def test_cat__same_shapes__channels_first__padding_channels(use_qat):
     target = "imxrt700"
 
     # The Converter is capable of padding the last dimension of `cat` with the same input shapes.
@@ -381,6 +387,7 @@ def test_cat__same_shapes__channels_first__padding_channels():
         target=target,
         neutron_converter_flavor="SDK_25_09",
         custom_delegation_options=CustomDelegationOptions(),
+        use_qat=use_qat,
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -390,7 +397,7 @@ def test_cat__same_shapes__channels_first__padding_channels():
     assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
 
 
-def test_cat__same_shapes_converter_padding_middle_dimension():
+def test_cat__same_shapes_converter_padding_middle_dimension(use_qat):
     target = "imxrt700"
 
     # The Converter is not capable of padding the middle dimensions of `cat` with the same input shapes.
@@ -401,6 +408,7 @@ def test_cat__same_shapes_converter_padding_middle_dimension():
         [input_shape, input_shape],
         target=target,
         custom_delegation_options=CustomDelegationOptions(),
+        use_qat=use_qat,
     ).exported_program()
 
     # Make sure the `Cat` was NOT delegated.
@@ -412,7 +420,7 @@ def test_cat__same_shapes_converter_padding_middle_dimension():
     )
 
 
-def test_cat__format_specific_support__formatless(mocker):
+def test_cat__format_specific_support__formatless(mocker, use_qat):
     # The last dim will end up being the channels, as the format is `formatless`.
     # Only the last dim satisfies the Neutron requirements for the channels.
     input_shape = (3, 3, 3, 8)
@@ -424,7 +432,7 @@ def test_cat__format_specific_support__formatless(mocker):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     quantized_program = to_quantized_edge_program(
-        CatModule(dim), input_shapes
+        CatModule(dim), input_shapes, use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
@@ -447,7 +455,7 @@ def test_cat__format_specific_support__formatless(mocker):
     )
 
 
-def test_cat__format_specific_support__channels_first(mocker):
+def test_cat__format_specific_support__channels_first(mocker, use_qat):
     # The second dim will end up being the channels, as the format is `formatless`.
     # Only the second dim satisfies the Neutron requirements for the channels.
     input_shape = (3, 8, 3, 3)
@@ -462,7 +470,7 @@ def test_cat__format_specific_support__channels_first(mocker):
         sum(shape[1] for shape in input_shapes) if dim in [1, -3] else input_shape[1]
     )
     quantized_program = to_quantized_edge_program(
-        CatConvModule(dim, channels), input_shapes
+        CatConvModule(dim, channels), input_shapes, use_qat=use_qat
     ).exported_program()
 
     # Make sure the `Cat` was delegated.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py
index 427ddaf14a5..250ddb88212 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_clone_converter.py
@@ -100,9 +100,15 @@ def target_can_be_clone(node):
         return node in clone_ops or target_can_be_clone(node)
 
     @parameterized.expand(
-        list(itertools.product([True, False], [(1, 3, 128, 128), (1, 3, 256, 256)]))
+        list(
+            itertools.product(
+                [True, False], [(1, 3, 128, 128), (1, 3, 256, 256)], [True, False]
+            )
+        )
     )
-    def test_conv_dropout_quant(self, inplace_dropout: bool, input_shape: tuple[int]):
+    def test_conv_dropout_quant(
+        self, inplace_dropout: bool, input_shape: tuple[int], use_qat: bool
+    ):
         model = SingleConvBlockWithDropout(
             conv_in_channels=input_shape[1], perform_inplace_dropout=inplace_dropout
         ).eval()
@@ -113,7 +119,10 @@ def test_conv_dropout_quant(self, inplace_dropout: bool, input_shape: tuple[int]
             owner=EdgeProgramToIRConverter,
         ) as converter_spy:
             quantized_program = to_quantized_edge_program(
-                model, input_shape, use_neutron_for_format_conversion=False
+                model,
+                input_shape,
+                use_qat=use_qat,
+                use_neutron_for_format_conversion=False,
             ).exported_program()
 
             tflite_flatbuffers_model, _ = converter_spy.calls[-1].return_value
@@ -157,7 +166,10 @@ def test_conv_dropout_no_quant(
         # Clone with inplace=True should not produce clone edge op and vice versa
         assert inplace_dropout ^ has_clone
 
-    def test_clone_pool_view_copy_quant(self, input_shape: tuple[int] = (1, 64, 25, 5)):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_clone_pool_view_copy_quant(
+        self, _, use_qat: bool, input_shape: tuple[int] = (1, 64, 25, 5)
+    ):
         model = KWSFinalBlock(input_shape).eval()
 
         with kgb.spy_on(
@@ -166,7 +178,7 @@ def test_clone_pool_view_copy_quant(self, input_shape: tuple[int] = (1, 64, 25,
             owner=EdgeProgramToIRConverter,
         ) as converter_spy:
             quantized_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             tflite_flatbuffers_model, _ = converter_spy.calls[-1].return_value
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py
index bd1f894001c..a2c9526a508 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py
@@ -22,6 +22,7 @@
     ConstantPadNDConvModule,
     ConstantPadNDModule,
 )
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 from executorch.exir.dialects._ops import ops as exir_ops
 
 
@@ -120,20 +121,24 @@ def test_constant_pad_nd_conversion__channels_first(input_shape, paddings):
         pytest.param((1, 1, 6, 8), (1, 2, 3, 4, 2, 1), id="4D, padding C, H, W"),
     ],
 )
-def test_constant_pad_nd__unsupported_paddings(input_shape, paddings):
+def test_constant_pad_nd__unsupported_paddings(input_shape, paddings, use_qat):
     model = ConstantPadNDModule(paddings)
-    exec_program = to_quantized_edge_program(model, input_shape).exported_program()
+    exec_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     nodes = list(exec_program.graph.nodes)
     # There is at least one non-delegated Pad node
     assert any(node.name == "aten_constant_pad_nd_default" for node in nodes)
 
 
-def test_constant_pad_nd__delegation__formatless__supported_padding():
+def test_constant_pad_nd__delegation__formatless__supported_padding(use_qat):
     input_shape = (2, 4, 6, 8)  # Formatless -> the last dim (8) will be padded.
     paddings = [0, 0, 1, 2, 3, 4]  # The last dim is padded using the first 2 paddings.
     model = ConstantPadNDModule(paddings)
-    exec_program = to_quantized_edge_program(model, input_shape).exported_program()
+    exec_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `pad` was delegated.
     assert not graph_contains_any_of_ops(
@@ -141,11 +146,13 @@ def test_constant_pad_nd__delegation__formatless__supported_padding():
     )
 
 
-def test_constant_pad_nd__delegation__formatless__unsupported_padding():
+def test_constant_pad_nd__delegation__formatless__unsupported_padding(use_qat):
     input_shape = (2, 4, 6, 8)  # Formatless -> the last dim (8) will be padded.
     paddings = [0, 1]  # The last dim is padded using the first 2 paddings.
     model = ConstantPadNDModule(paddings)
-    exec_program = to_quantized_edge_program(model, input_shape).exported_program()
+    exec_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `pad` was NOT delegated.
     assert graph_contains_any_of_ops(
@@ -153,11 +160,13 @@ def test_constant_pad_nd__delegation__formatless__unsupported_padding():
     )
 
 
-def test_constant_pad_nd__delegation__channels_first__supported_padding():
+def test_constant_pad_nd__delegation__channels_first__supported_padding(use_qat):
     input_shape = (2, 4, 6, 8)  # Channels first -> the second dim (4) will be padded.
     paddings = [1, 2, 3, 4, 0, 0]  # The second dim is padded using the paddings[4:6].
     model = ConstantPadNDConvModule(paddings)
-    exec_program = to_quantized_edge_program(model, input_shape).exported_program()
+    exec_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `pad` was delegated.
     assert not graph_contains_any_of_ops(
@@ -165,11 +174,13 @@ def test_constant_pad_nd__delegation__channels_first__supported_padding():
     )
 
 
-def test_constant_pad_nd__delegation__channels_first__unsupported_padding():
+def test_constant_pad_nd__delegation__channels_first__unsupported_padding(use_qat):
     input_shape = (2, 3, 6, 8)  # Channels first -> the second dim (3) will be padded.
     paddings = [0, 0, 0, 0, 1, 0]  # The second dim is padded using the paddings[4:6].
     model = ConstantPadNDConvModule(paddings)
-    exec_program = to_quantized_edge_program(model, input_shape).exported_program()
+    exec_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `pad` was NOT delegated.
     assert graph_contains_any_of_ops(
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py
index 0fabbf615c9..56fdf1a2e0c 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_conv_converter.py
@@ -30,6 +30,7 @@
 from executorch.backends.nxp.tests.models import Conv1dModule, Conv2dModule
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -42,7 +43,7 @@ def reseed_model_per_test_run():
 @pytest.mark.parametrize("stride", [1, 2])
 @pytest.mark.parametrize("dilation", [2, 1])
 @pytest.mark.parametrize("kernel_size", [(1,), (3,)])
-def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker):
+def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker, use_qat):
     input_shape = (1, 4, 16)
     model = Conv1dModule(
         bias=bias, stride=stride, dilation=dilation, kernel_size=kernel_size
@@ -51,7 +52,7 @@ def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker):
     ops_spy = mocker.spy(ModelBuilder, "finish")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, input_shape)
+    _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -96,7 +97,7 @@ def test_conv1d_quant_conversion(bias, stride, dilation, kernel_size, mocker):
 )
 @pytest.mark.parametrize("padding", [(1,), 2])
 def test_conv1d_quant_conversion__padded(
-    stride, dilation, kernel_size, padding, mocker
+    stride, dilation, kernel_size, padding, mocker, use_qat
 ):
     input_shape = (1, 4, 16)
     model = Conv1dModule(
@@ -106,7 +107,7 @@ def test_conv1d_quant_conversion__padded(
     ops_spy = mocker.spy(ModelBuilder, "finish")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, input_shape)
+    _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -153,7 +154,7 @@ def test_conv1d_quant_conversion__padded(
 @pytest.mark.parametrize("dilation", [2, 1])
 @pytest.mark.parametrize("kernel_size", [(1,), (3,)])
 def test_conv1d_quant_conversion__depthwise(
-    bias, stride, dilation, kernel_size, mocker
+    bias, stride, dilation, kernel_size, mocker, use_qat
 ):
     input_shape = (1, 4, 16)
     group = input_shape[1]
@@ -170,7 +171,7 @@ def test_conv1d_quant_conversion__depthwise(
     ops_spy = mocker.spy(ModelBuilder, "finish")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, input_shape)
+    _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -214,7 +215,7 @@ def test_conv1d_quant_conversion__depthwise(
 )
 @pytest.mark.parametrize("padding", [(1,), 2])
 def test_conv1d_quant_conversion__depthwise__padded(
-    stride, dilation, kernel_size, padding, mocker
+    stride, dilation, kernel_size, padding, mocker, use_qat
 ):
     input_shape = (1, 4, 16)
     group = input_shape[1]
@@ -231,7 +232,7 @@ def test_conv1d_quant_conversion__depthwise__padded(
     ops_spy = mocker.spy(ModelBuilder, "finish")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, input_shape)
+    _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -401,12 +402,12 @@ def test_conv1d_quant_conversion__depthwise__padded(
         ),
     ],
 )
-def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape):
+def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     _ = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     )
 
     # Capture generated model
@@ -432,7 +433,7 @@ def test_conv2d_quant_conversion(mocker, model: torch.nn.Module, input_shape):
 @pytest.mark.parametrize("dilation", [1, 2])
 @pytest.mark.parametrize("kernel_shape", [[1, 2], [3, 3], [4, 1]])
 def test_conv2d_conversion__depthwise__quantized(
-    bias, stride, dilation, kernel_shape, mocker
+    bias, stride, dilation, kernel_shape, mocker, use_qat
 ):
     input_shape = (1, 4, 12, 12)
     group = input_shape[1]
@@ -449,6 +450,7 @@ def test_conv2d_conversion__depthwise__quantized(
             kernel_size=kernel_shape,
         ),
         tuple(input_shape),
+        use_qat=use_qat,
         use_neutron_for_format_conversion=False,
     ).exported_program()
 
@@ -497,7 +499,7 @@ def test_conv2d_conversion__depthwise__padded(padding, mocker):
 
 
 @pytest.mark.parametrize("padding", [1, 2])
-def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker):
+def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker, use_qat):
     input_shape = (1, 4, 12, 12)
     group = input_shape[1]
     spy = mocker.spy(ModelBuilder, "finish")
@@ -507,6 +509,7 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker):
             group=group, in_channels=group, out_channels=group, padding=padding
         ),
         tuple(input_shape),
+        use_qat=use_qat,
         use_neutron_for_format_conversion=False,
     ).exported_program()
 
@@ -580,12 +583,12 @@ def test_conv2d_conversion__depthwise__padded__quantized(padding, mocker):
     ],
 )
 def test_conv_transpose2d_conversion__quantized(
-    mocker, model: torch.nn.Module, input_shape
+    mocker, model: torch.nn.Module, input_shape, use_qat
 ):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     edge_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
 
     # Make sure the `TransposeConv` was delegated.
@@ -664,9 +667,11 @@ def test_conv_transpose2d_conversion__quantized(
     ],
 )
 def test_conv_transpose2d_non_delegated_conversion__quantized(
-    model: torch.nn.Module, input_shape
+    model: torch.nn.Module, input_shape, use_qat
 ):
-    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+    edge_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     nodes = list(edge_program.graph.nodes)
     assert len(nodes) == 15
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
index dad8ce6a0e3..fb272a2c650 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
@@ -23,6 +23,7 @@
 from executorch.backends.nxp.tests.models import Conv2dWithActivation
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -33,7 +34,7 @@ def reseed_model_per_test_run():
 
 @pytest.mark.parametrize("input_shape", [(1, 3, 128, 128)])
 @pytest.mark.parametrize("inplace", [True, False])
-def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool):
+def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool, use_qat: bool):
     # The torch.nn.Relu6 inherits from torch.nn.Hardtanh, and hence represented as HardTanh in ATen.
     # Testing the hardtanh originated from torch.nn.Relu6 op.
     model = Conv2dWithActivation(
@@ -43,7 +44,7 @@ def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     quantized_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
 
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -69,7 +70,11 @@ def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool):
 )
 @pytest.mark.parametrize("inplace", [True, False])
 def test_custom_hardtanh_quant(
-    mocker, input_shape: tuple[int], activation_range: tuple[int, int], inplace: bool
+    mocker,
+    input_shape: tuple[int],
+    activation_range: tuple[int, int],
+    inplace: bool,
+    use_qat: bool,
 ):
     # TODO(13063): This test suffers from non-ideal testing random quantization, because we always use range <0,1>.
     #  We should update (decrease atol) when the Conv/Linear + Activation fuse at quantization is in place.
@@ -82,7 +87,7 @@ def test_custom_hardtanh_quant(
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     quantized_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
 
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
index 8b938ef7fff..569ad571dbc 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py
@@ -25,6 +25,7 @@
 from executorch.backends.xnnpack._passes import RemoveGetItemPass
 from executorch.exir.verification.verifier import EXIREdgeDialectVerifier
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -103,13 +104,14 @@ def test_max_pool_2d_conversion(input_shape, padding):
         ),
     ],
 )
-def test_max_pool_2d_quant_conversion(mocker, input_shape, padding):
+def test_max_pool_2d_quant_conversion(mocker, input_shape, padding, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     _ = to_quantized_edge_program(
         MaxPool2dConvModule(padding=padding),
         input_shape,
+        use_qat=use_qat,
         use_neutron_for_format_conversion=False,
     )
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
index ee69b1ea352..7c0a5e8ffcf 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
@@ -18,6 +18,7 @@
     ToChannelLastPreprocess,
 )
 from executorch.backends.nxp.tests.models import MeanDimConvModule, MeanDimLinearModule
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
 
@@ -47,14 +48,16 @@ def forward(self, x):
         pytest.param((1, 4, 8, 8), (3, 2), id="Dim 3, 2."),
     ],
 )
-def test_mean_dim_conv_quant_conversion(mocker, input_shape, dim, keepdim=True):
+def test_mean_dim_conv_quant_conversion(
+    mocker, input_shape, dim, use_qat, keepdim=True
+):
     model = MeanDimConvModule(dim, keepdim)
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     ep = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
     # Make sure the `mean.dim` was delegated.
     assert not graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
@@ -93,14 +96,16 @@ def test_mean_dim_conv_quant_conversion(mocker, input_shape, dim, keepdim=True):
     ],
 )
 def test_mean_dim_linear_unsupported_quant_conversion(
-    mocker, input_shape, dim, keepdim
+    mocker, input_shape, dim, use_qat, keepdim
 ):
     model = MeanDimLinearModule(dim, keepdim)
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+    edge_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
     nodes = list(edge_program.graph.nodes)
 
     # Last 2 dimensions are not used or keepdim is False, cannot be converted to MeanDim, node is not delegated
@@ -138,14 +143,16 @@ def test_mean_dim_linear_unsupported_quant_conversion(
         pytest.param(True, id="Keep dim."),
     ],
 )
-def test_mean_dim_conv_unsupported_quant_conversion(mocker, input_shape, dim, keepdim):
+def test_mean_dim_conv_unsupported_quant_conversion(
+    mocker, input_shape, dim, use_qat, keepdim
+):
     model = MeanDimConvModule(dim, keepdim)
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     edge_program = to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
     nodes = list(edge_program.graph.nodes)
 
@@ -178,12 +185,16 @@ def test_mean_dim_conv_unsupported_quant_conversion(mocker, input_shape, dim, ke
         pytest.param((1, 2, 3, 8), (-2, -3), id="Dim -2, -3."),
     ],
 )
-def test_mean_dim__formatless__supported(mocker, input_shape, dim, keepdim=True):
+def test_mean_dim__formatless__supported(
+    mocker, input_shape, dim, use_qat, keepdim=True
+):
     model = MeanDimModule(dim, keepdim)
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
-    ep = to_quantized_edge_program(model, input_shape).exported_program()
+    ep = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `mean.dim` was delegated.
     assert not graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
@@ -211,10 +222,12 @@ def test_mean_dim__formatless__supported(mocker, input_shape, dim, keepdim=True)
         pytest.param((1, 2, 3, 8), (2, 3), id="Dim 2, 3."),
     ],
 )
-def test_mean_dim__formatless__unsupported(input_shape, dim, keepdim=True):
+def test_mean_dim__formatless__unsupported(input_shape, dim, use_qat, keepdim=True):
     model = MeanDimModule(dim, keepdim)
 
-    ep = to_quantized_edge_program(model, input_shape).exported_program()
+    ep = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `mean.dim` was NOT delegated.
     assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
@@ -229,10 +242,14 @@ def test_mean_dim__formatless__unsupported(input_shape, dim, keepdim=True):
         ),
     ],
 )
-def test_mean_dim__formatless__unsupported_channels(input_shape, dim, keepdim=True):
+def test_mean_dim__formatless__unsupported_channels(
+    input_shape, dim, use_qat, keepdim=True
+):
     model = MeanDimModule(dim, keepdim)
 
-    ep = to_quantized_edge_program(model, input_shape).exported_program()
+    ep = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `mean.dim` was NOT delegated.
     assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
@@ -247,13 +264,17 @@ def test_mean_dim__formatless__unsupported_channels(input_shape, dim, keepdim=Tr
         ),
     ],
 )
-def test_mean_dim__channels_first__unsupported_channels(input_shape, dim, keepdim=True):
+def test_mean_dim__channels_first__unsupported_channels(
+    input_shape, dim, use_qat, keepdim=True
+):
     model = MeanDimConvModule(
         dim, keepdim, out_channels=5
     )  # Only multiples of 8 (num_macs) are supported.
 
     # Run conversion
-    ep = to_quantized_edge_program(model, input_shape).exported_program()
+    ep = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure the `mean.dim` was NOT delegated.
     assert graph_contains_any_of_ops(ep.graph, [exir_ops.edge.aten.mean.dim])
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py
index a2b406cdc76..962a4f4b0c1 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_mm_converter.py
@@ -19,6 +19,7 @@
 )
 from executorch.backends.nxp.tests.models import LinearModule, MmModule
 from executorch.exir.dialects._ops import ops as exir_ops
+from parameterized import parameterized
 from torch.export import ExportedProgram
 
 
@@ -28,7 +29,8 @@ def setUpClass(cls):
         torch.manual_seed(23)
         np.random.seed(42)
 
-    def test_mm_conversion(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_mm_conversion(self, _, use_qat: bool):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
             call_original=True,
@@ -38,7 +40,7 @@ def test_mm_conversion(self):
             model = MmModule(input_shape[1])
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
@@ -60,7 +62,8 @@ def test_mm_conversion(self):
                 tfl_model=tflite_flatbuffers_model,
             )
 
-    def test_linear_conversion__without_bias(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_linear_conversion__without_bias(self, _, use_qat: bool):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
             call_original=True,
@@ -70,7 +73,7 @@ def test_linear_conversion__without_bias(self):
             model = LinearModule(bias=False)
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py
index c4fb84dbb60..d32de7241e5 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py
@@ -21,6 +21,7 @@
 from executorch.exir.dialects._ops import ops as exir_ops
 from parameterized import parameterized
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 class Conv2dTransposeModule(torch.nn.Module):
@@ -112,12 +113,14 @@ def setUpClass(cls):
 
     @parameterized.expand(
         [
-            ["To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2)],
-            ["To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1)],
+            ["QAT; To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2), True],
+            ["PTQ; To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2), False],
+            ["QAT; To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1), True],
+            ["PTQ; To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1), False],
         ]
     )
     def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_input(
-        self, _: str, input_shape, perm
+        self, _: str, input_shape, perm, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program, call_original=True
@@ -126,7 +129,7 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_inp
 
             # Run conversion
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure the `Permute_copy` was delegated.
@@ -156,12 +159,14 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_inp
 
     @parameterized.expand(
         [
-            ["To channel first permutation", (1, 8, 8, 8), (0, 3, 1, 2)],
-            ["To channel last permutation", (1, 8, 8, 8), (0, 2, 3, 1)],
+            ["QAT; To channel first permutation", (1, 8, 8, 8), (0, 3, 1, 2), True],
+            ["PTQ; To channel first permutation", (1, 8, 8, 8), (0, 3, 1, 2), False],
+            ["QAT; To channel last permutation", (1, 8, 8, 8), (0, 2, 3, 1), True],
+            ["PTQ; To channel last permutation", (1, 8, 8, 8), (0, 2, 3, 1), False],
         ]
     )
     def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_output(
-        self, _: str, input_shape, perm
+        self, _: str, input_shape, perm, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program, call_original=True
@@ -170,7 +175,7 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_out
 
             # Run conversion
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure the `Permute_copy` was delegated.
@@ -200,14 +205,66 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_out
 
     @parameterized.expand(
         [
-            ["nchw->nhwc ... nchw->nhwc", (1, 8, 8, 8), (0, 2, 3, 1), (0, 2, 3, 1)],
-            ["nchw->nhwc ... nhwc->nchw", (1, 8, 8, 8), (0, 2, 3, 1), (0, 3, 1, 2)],
-            ["nhwc->nchw ... nhwc->nchw", (1, 8, 8, 8), (0, 3, 1, 2), (0, 3, 1, 2)],
-            ["nhwc->nchw ... nchw->nhwc", (1, 8, 8, 8), (0, 3, 1, 2), (0, 2, 3, 1)],
+            [
+                "QAT; nchw->nhwc ... nchw->nhwc",
+                (1, 8, 8, 8),
+                (0, 2, 3, 1),
+                (0, 2, 3, 1),
+                True,
+            ],
+            [
+                "PTQ; nchw->nhwc ... nchw->nhwc",
+                (1, 8, 8, 8),
+                (0, 2, 3, 1),
+                (0, 2, 3, 1),
+                False,
+            ],
+            [
+                "QAT; nchw->nhwc ... nhwc->nchw",
+                (1, 8, 8, 8),
+                (0, 2, 3, 1),
+                (0, 3, 1, 2),
+                True,
+            ],
+            [
+                "PTQ; nchw->nhwc ... nhwc->nchw",
+                (1, 8, 8, 8),
+                (0, 2, 3, 1),
+                (0, 3, 1, 2),
+                False,
+            ],
+            [
+                "QAT; nhwc->nchw ... nhwc->nchw",
+                (1, 8, 8, 8),
+                (0, 3, 1, 2),
+                (0, 3, 1, 2),
+                True,
+            ],
+            [
+                "PTQ; nhwc->nchw ... nhwc->nchw",
+                (1, 8, 8, 8),
+                (0, 3, 1, 2),
+                (0, 3, 1, 2),
+                False,
+            ],
+            [
+                "QAT; nhwc->nchw ... nchw->nhwc",
+                (1, 8, 8, 8),
+                (0, 3, 1, 2),
+                (0, 2, 3, 1),
+                True,
+            ],
+            [
+                "PTQ; nhwc->nchw ... nchw->nhwc",
+                (1, 8, 8, 8),
+                (0, 3, 1, 2),
+                (0, 2, 3, 1),
+                False,
+            ],
         ]
     )
     def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_io(
-        self, _: str, input_shape, perm1, perm2
+        self, _: str, input_shape, perm1, perm2, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program, call_original=True
@@ -216,7 +273,7 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_io(
 
             # Run conversion
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure the `Permute_copy` was delegated.
@@ -246,20 +303,53 @@ def test_permute_copy_conversion__from_permute_4D__quantized__channels_first_io(
 
     @parameterized.expand(
         [
-            ["Permutation can be replaced by reshapes", (10, 1, 8), (0, 2, 1)],
-            ["Permutation can be replaced by reshapes", (10, 1, 1), (2, 1, 0)],
-            ["Permutation is identical and can be removed", (10, 1, 8), (0, 1, 2)],
+            [
+                "QAT; Permutation can be replaced by reshapes",
+                (10, 1, 8),
+                (0, 2, 1),
+                True,
+            ],
+            [
+                "PTQ; Permutation can be replaced by reshapes",
+                (10, 1, 8),
+                (0, 2, 1),
+                False,
+            ],
+            [
+                "QAT; Permutation can be replaced by reshapes",
+                (10, 1, 1),
+                (2, 1, 0),
+                True,
+            ],
+            [
+                "PTQ; Permutation can be replaced by reshapes",
+                (10, 1, 1),
+                (2, 1, 0),
+                False,
+            ],
+            [
+                "QAT; Permutation is identical and can be removed",
+                (10, 1, 8),
+                (0, 1, 2),
+                True,
+            ],
+            [
+                "PTQ; Permutation is identical and can be removed",
+                (10, 1, 8),
+                (0, 1, 2),
+                False,
+            ],
         ]
     )
     def test_permute_copy_conversion__from_permute_3D__quantized(
-        self, _: str, input_shape, perm
+        self, _: str, input_shape, perm, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program, call_original=True
         ) as converter_spy:
             # Run conversion
             edge_program = to_quantized_edge_program(
-                LinearPermuteModule(input_shape[2], perm), input_shape
+                LinearPermuteModule(input_shape[2], perm), input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure the `Permute_copy` was delegated.
@@ -289,17 +379,23 @@ def test_permute_copy_conversion__from_permute_3D__quantized(
 
     @parameterized.expand(
         [
-            ["Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3)],
-            ["To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3)],
-            ["To  (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0)],
-            ["To  (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2)],
+            ["QAT; Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3), True],
+            ["PTQ; Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3), False],
+            ["QAT; To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3), True],
+            ["PTQ; To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3), False],
+            ["QAT; To  (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0), True],
+            ["PTQ; To  (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0), False],
+            ["QAT; To  (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2), True],
+            ["PTQ; To  (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2), False],
         ]
     )
     def test_permute_copy_non_delegated_conversion__from_permute_4D__quantized(
-        self, _: str, input_shape, perm
+        self, _: str, input_shape, perm, use_qat
     ):
         model = Conv2dPermuteModule(input_shape[1], perm)
-        edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+        edge_program = to_quantized_edge_program(
+            model, input_shape, use_qat=use_qat
+        ).exported_program()
 
         nodes = list(edge_program.graph.nodes)
         assert len(nodes) == 8
@@ -309,15 +405,19 @@ def test_permute_copy_non_delegated_conversion__from_permute_4D__quantized(
 
     @parameterized.expand(
         [
-            ["Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2],
-            ["Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3],
+            ["QAT; Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2, True],
+            ["PTQ; Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2, False],
+            ["QAT; Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3, True],
+            ["PTQ; Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3, False],
         ]
     )
     def test_permute_copy_non_delegated_conversion__from_transpose_4D__quantized(
-        self, _: str, input_shape, dim0, dim1
+        self, _: str, input_shape, dim0, dim1, use_qat
     ):
         model = Conv2dTransposeModule(input_shape[1], dim0, dim1)
-        edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+        edge_program = to_quantized_edge_program(
+            model, input_shape, use_qat=use_qat
+        ).exported_program()
 
         nodes = list(edge_program.graph.nodes)
         assert len(nodes) == 8
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py
index cf0e0135ffe..b91720324f2 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_relu_converter.py
@@ -21,6 +21,7 @@
 )
 from executorch.backends.nxp.tests.models import Conv2dModule, LinearModule, ReLUModule
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -62,13 +63,16 @@ def test_relu_conversion():
     convert_run_compare(edge_program, input_data=input_data)
 
 
-def test_relu_with_conv_quant_conversion(mocker):
+def test_relu_with_conv_quant_conversion(mocker, use_qat):
     input_shape = (1, 4, 32, 32)
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
     _ = to_quantized_edge_program(
-        ConvReLUModule(), input_shape, use_neutron_for_format_conversion=False
+        ConvReLUModule(),
+        input_shape,
+        use_qat=use_qat,
+        use_neutron_for_format_conversion=False,
     )
 
     # Capture generated model
@@ -90,12 +94,12 @@ def test_relu_with_conv_quant_conversion(mocker):
     )
 
 
-def test_relu_with_linear_quant_conversion(mocker):
+def test_relu_with_linear_quant_conversion(mocker, use_qat):
     input_shape = (256, 32)
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    _ = to_quantized_edge_program(LinearReLUModule(), input_shape)
+    _ = to_quantized_edge_program(LinearReLUModule(), input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, _ = converter_spy.spy_return
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py
index 382266e9cb1..ad03aa18ded 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_sigmoid_converter.py
@@ -20,6 +20,7 @@
 from executorch.backends.nxp.tests.models import ConvWithSigmoid
 from torch import nn
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -28,13 +29,13 @@ def reseed_model_per_test_run():
     np.random.seed(23)
 
 
-def test_conv_sigmoid(mocker, input_shape: tuple[int] = (1, 3, 112, 112)):
+def test_conv_sigmoid(mocker, use_qat, input_shape: tuple[int] = (1, 3, 112, 112)):
     model = ConvWithSigmoid(conv_in_channels=input_shape[1])
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     to_quantized_edge_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     ).exported_program()
 
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -61,12 +62,12 @@ def test_conv_sigmoid(mocker, input_shape: tuple[int] = (1, 3, 112, 112)):
         pytest.param((10, 3, 25, 25, 25), id="4D"),
     ],
 )
-def test_sigmoid_only(mocker, input_shape):
+def test_sigmoid_only(mocker, use_qat, input_shape):
     model = nn.Sigmoid()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
-    to_quantized_edge_program(model, input_shape).exported_program()
+    to_quantized_edge_program(model, input_shape, use_qat=use_qat).exported_program()
 
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
     exported_program: ExportedProgram = converter_spy.call_args.args[1]
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py
index 336c3cc9afd..9ce3e93f39b 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_sub_tensor_converter.py
@@ -22,6 +22,7 @@
 )
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -39,13 +40,13 @@ def reseed_model_per_test_run():
         pytest.param((1, 4, 8, 8), id="4D."),
     ],
 )
-def test_sub_tensor_quant_conversion(mocker, input_shape):
+def test_sub_tensor_quant_conversion(mocker, input_shape, use_qat):
     model = SubTensorModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, [input_shape, input_shape])
+    _ = to_quantized_edge_program(model, [input_shape, input_shape], use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -78,13 +79,13 @@ def test_sub_tensor_quant_conversion(mocker, input_shape):
         pytest.param((1, 4, 8, 8), id="4D."),
     ],
 )
-def test_sub_tensor_one_input_quant_conversion(mocker, input_shape):
+def test_sub_tensor_one_input_quant_conversion(mocker, input_shape, use_qat):
     model = SubTensorOneInputModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    _ = to_quantized_edge_program(model, input_shape)
+    _ = to_quantized_edge_program(model, input_shape, use_qat=use_qat)
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -109,7 +110,7 @@ def test_sub_tensor_one_input_quant_conversion(mocker, input_shape):
         pytest.param((1, 4, 5, 5), id="4D, product of dims is not a multiple of 8."),
     ],
 )
-def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape):
+def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape, use_qat):
     model = SubTensorConvModule()
 
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
@@ -119,7 +120,10 @@ def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape):
 
     # Run conversion
     _ = to_quantized_edge_program(
-        model, [x_input_shape, y_input_shape], use_neutron_for_format_conversion=False
+        model,
+        [x_input_shape, y_input_shape],
+        use_qat=use_qat,
+        use_neutron_for_format_conversion=False,
     )
 
     # Capture generated model
@@ -161,13 +165,13 @@ def test_sub_tensor_w_conv_quant_conversion(mocker, x_input_shape):
     ],
 )
 def test_sub_tensor_broadcasting_unsupported_quant_conversion(
-    x_input_shape, y_input_shape
+    x_input_shape, y_input_shape, use_qat
 ):
     model = SubTensorModule()
 
     # Run conversion
     edge_program = to_quantized_edge_program(
-        model, [x_input_shape, y_input_shape]
+        model, [x_input_shape, y_input_shape], use_qat=use_qat
     ).exported_program()
     nodes = list(edge_program.graph.nodes)
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
index eb5fc6600f5..10892d28e38 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
@@ -34,18 +34,18 @@ def setUpClass(cls):
 
     @parameterized.expand(
         input=[
-            (
-                "inplace",
-                True,
-            ),
-            (
-                "not_inplace",
-                False,
-            ),
+            ("QAT inplace", True, True),
+            ("PTQ inplace", True, False),
+            ("QAT not-inplace", False, True),
+            ("PTQ not-inplace", False, False),
         ]
     )
     def test_conv_tanh(
-        self, _: str, inplace: bool, input_shape: tuple[int] = (1, 3, 112, 112)
+        self,
+        _: str,
+        inplace: bool,
+        use_qat: bool,
+        input_shape: tuple[int] = (1, 3, 112, 112),
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
@@ -62,7 +62,10 @@ def test_conv_tanh(
                 )
 
             quantized_program = to_quantized_edge_program(
-                model, input_shape, use_neutron_for_format_conversion=False
+                model,
+                input_shape,
+                use_qat=use_qat,
+                use_neutron_for_format_conversion=False,
             ).exported_program()
             tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
             exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py
index 6e3da6c91cd..ce9fecb049b 100644
--- a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py
+++ b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py
@@ -38,6 +38,7 @@
 from executorch.exir.dialects._ops import ops as exir_ops
 from torch import nn
 from torch.export import ExportedProgram
+from executorch.backends.nxp.tests.use_qat import *  # noqa F403
 
 
 @pytest.fixture(autouse=True)
@@ -243,11 +244,13 @@ def test__view_copy__formatless_to_formatless(mocker):
         pytest.param((8, 64), (1, 16, 4, 4), id="2D"),
     ],
 )
-def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape):
+def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
-    _ = to_quantized_edge_program(LinearReshapeModule(new_shape=new_shape), input_shape)
+    _ = to_quantized_edge_program(
+        LinearReshapeModule(new_shape=new_shape), input_shape, use_qat=use_qat
+    )
 
     # Capture generated model
     tflite_flatbuffers_model, io_formats = converter_spy.spy_return
@@ -268,7 +271,9 @@ def test_view_copy_w_linear_quant_conversion(mocker, input_shape, new_shape):
         pytest.param((1, 4, 16, 16), 196, id="4D"),
     ],
 )
-def test_view_w_conv_linear_quant_conversion(mocker, input_shape, channels_view_out):
+def test_view_w_conv_linear_quant_conversion(
+    mocker, input_shape, channels_view_out, use_qat
+):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
 
     # Run conversion
@@ -277,6 +282,7 @@ def test_view_w_conv_linear_quant_conversion(mocker, input_shape, channels_view_
             channels=input_shape[1], channels_view_out=channels_view_out
         ),
         input_shape,
+        use_qat=use_qat,
         use_neutron_for_format_conversion=False,
     )
 
diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py
index edba1af4ede..e2b41aab8de 100644
--- a/backends/nxp/tests/models.py
+++ b/backends/nxp/tests/models.py
@@ -631,3 +631,42 @@ def __init__(self, activation: str, inplace: bool, in_channels: int):
     def forward(self, x):
         x = self.conv(x)
         return self.activation(x)
+
+
+class MiniConvNetWithRegressionHead(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+        self.conv1 = Conv2dModule(in_channels=3, out_channels=16, stride=1, padding=1)
+        self.relu = torch.nn.ReLU()
+        self.pool = torch.nn.MaxPool2d(2, 2)
+        self.conv2 = Conv2dModule(in_channels=16, out_channels=32, stride=1, padding=1)
+        self.relu2 = torch.nn.ReLU()
+        self.pool = torch.nn.MaxPool2d(2, 2)
+        self.linear = torch.nn.Linear(32 * 8 * 8, 1)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.relu(x)
+        x = self.pool(x)
+        x = self.conv2(x)
+        x = self.relu2(x)
+        x = self.pool(x)
+        x = x.flatten()
+        x = self.linear(x)
+        return x
+
+
+class MLP(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.sequential = torch.nn.Sequential(
+            torch.nn.Linear(1, 10),
+            torch.nn.ReLU(),
+            torch.nn.Linear(10, 10),
+            torch.nn.ReLU(),
+            torch.nn.Linear(10, 1),
+        )
+
+    def forward(self, x):
+        return self.sequential(x)
diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py
index bde3d22e204..d93b1ae69ff 100644
--- a/backends/nxp/tests/test_edge_passes.py
+++ b/backends/nxp/tests/test_edge_passes.py
@@ -29,7 +29,7 @@
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 from executorch.backends.nxp.tests.executorch_pipeline import (
     get_random_calibration_inputs,
     neutron_target_spec,
@@ -87,6 +87,8 @@ def _assert_nodes_form_a_view_copy_qdq_cluster(graph: Graph, node_indices: list[
 
 
 class TestEdgePasses(unittest.TestCase):
+    __test__ = False  # Prevent interfering with PyTest tests
+
     @classmethod
     def setUpClass(cls):
         torch.manual_seed(23)
@@ -305,7 +307,7 @@ def test_remove_additional_quantize_dequantize_nodes_pass(self):
         example_input = calibration_inputs[0]
         exir_program_aten = torch.export.export(model, example_input, strict=True)
 
-        exir_program_aten_quant = post_training_quantize(
+        exir_program_aten_quant = calibrate_and_quantize(
             exir_program_aten,
             calibration_inputs,
             NeutronQuantizer(neutron_target_spec),
diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py
index 3bd5f3e1487..fe157b44c48 100644
--- a/backends/nxp/tests/test_integration.py
+++ b/backends/nxp/tests/test_integration.py
@@ -5,6 +5,7 @@
 
 import executorch.extension.pybindings.portable_lib
 import executorch.kernels.quantized  # noqa F401
+from executorch.backends.nxp.tests.use_qat import *  # noqa F401
 
 from executorch.backends.nxp.tests.executorch_pipeline import (
     to_quantized_executorch_program,
@@ -14,11 +15,11 @@
 from executorch.examples.nxp.experimental.cifar_net.cifar_net import CifarNet
 
 
-def test_conv_fc_softmax__to_executorch_program():
+def test_conv_fc_softmax__to_executorch_program(use_qat):
     model = ConvFCSoftmaxModule()
     input_shape = (1, 4, 5, 5)
 
-    exec_prog = to_quantized_executorch_program(model, input_shape)
+    exec_prog = to_quantized_executorch_program(model, input_shape, use_qat)
 
     program = exec_prog.exported_program()
     assert (
@@ -36,11 +37,11 @@ def test_conv_fc_softmax__to_executorch_program():
         assert "addmm" not in node.name
 
 
-def test_cifarnet():
+def test_cifarnet(use_qat):
     model = CifarNet().get_eager_model().eval()
     input_shape = (1, 3, 32, 32)
     exec_prog = to_quantized_executorch_program(
-        model, input_shape, use_neutron_for_format_conversion=False
+        model, input_shape, use_qat=use_qat, use_neutron_for_format_conversion=False
     )
 
     delegation_info = get_delegation_info(exec_prog.exported_program().graph_module)
diff --git a/backends/nxp/tests/test_move_activation_before_concatenation.py b/backends/nxp/tests/test_move_activation_before_concatenation.py
index cede3e41994..27bd675a487 100644
--- a/backends/nxp/tests/test_move_activation_before_concatenation.py
+++ b/backends/nxp/tests/test_move_activation_before_concatenation.py
@@ -3,6 +3,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import itertools
 import math
 import unittest
 
@@ -19,7 +20,7 @@
     EdgeProgramToIRConverter,
 )
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 from executorch.backends.nxp.tests.executorch_pipeline import (
     get_random_calibration_inputs,
     neutron_target_spec,
@@ -50,6 +51,35 @@
 ]
 
 
+# Permutation of all supported combinations of:
+# <activation>, <is_inplace>, <use_qat>
+all_activation_cases = list(
+    itertools.product(
+        ["relu", "relu6", "tanh"],
+        [True, False],
+        [True, False],
+    )
+) + [
+    ("sigmoid", False, True),
+    ("sigmoid", False, False),
+]
+
+
+# <activation1>, <activation2>, <act1_inplace>, <act2_inplace>, <use_qat>
+all_concat_cluster_cases = [
+    ("relu", "relu", True, False, True),
+    ("relu", "relu", True, False, False),
+    ("relu6", "relu6", False, True, True),
+    ("relu6", "relu6", False, True, False),
+    ("tanh", "tanh", True, False, True),
+    ("tanh", "tanh", True, False, False),
+    ("sigmoid", "sigmoid", False, True, True),
+    ("sigmoid", "sigmoid", False, True, False),
+    ("relu", "relu_hardtanh", True, True, True),
+    ("relu", "relu_hardtanh", True, True, False),
+]
+
+
 class ConvConcatActivationModule(torch.nn.Module):
     def __init__(self, activation: str, inplace: bool, in_channels: int):
         super().__init__()
@@ -174,18 +204,8 @@ def setUpClass(cls):
         torch.manual_seed(23)
         np.random.seed(42)
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
-    def test_move_activation_before_concat__conv(self, activation, inplace):
+    @parameterized.expand(all_activation_cases)
+    def test_move_activation_before_concat__conv(self, activation, inplace, is_qat):
         input_shape = (1, 3, 8, 8)
         model = ConvConcatActivationModule(
             activation=activation, inplace=inplace, in_channels=3
@@ -248,10 +268,11 @@ def test_move_activation_before_concat__conv(self, activation, inplace):
         neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec)
         neutron_aten_pass_manager(exir_program_aten)  # All passes by default.
 
-        exir_program_aten_quant = post_training_quantize(
+        exir_program_aten_quant = calibrate_and_quantize(
             exir_program_aten,
             calibration_inputs,
             NeutronQuantizer(neutron_target_spec),
+            is_qat=is_qat,
         )
 
         # Check convolution and activation are in same QDQ cluster.
@@ -282,18 +303,8 @@ def test_move_activation_before_concat__conv(self, activation, inplace):
             == torch.ops.quantized_decomposed.quantize_per_tensor.default
         )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
-    def test_move_activation_before_concat__linear(self, activation, inplace):
+    @parameterized.expand(all_activation_cases)
+    def test_move_activation_before_concat__linear(self, activation, inplace, is_qat):
         input_shape = (1, 8)
         model = LinearConcatActivationModule(
             activation=activation, inplace=inplace, in_channels=8, mode="linear"
@@ -356,10 +367,11 @@ def test_move_activation_before_concat__linear(self, activation, inplace):
         neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec)
         neutron_aten_pass_manager(exir_program_aten)  # All passes by default.
 
-        exir_program_aten_quant = post_training_quantize(
+        exir_program_aten_quant = calibrate_and_quantize(
             exir_program_aten,
             calibration_inputs,
             NeutronQuantizer(neutron_target_spec),
+            is_qat=is_qat,
         )
 
         # Check linear and activation are in same QDQ cluster.
@@ -390,18 +402,8 @@ def test_move_activation_before_concat__linear(self, activation, inplace):
             == torch.ops.quantized_decomposed.quantize_per_tensor.default
         )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
-    def test_move_activation_before_concat__addmm(self, activation, inplace):
+    @parameterized.expand(all_activation_cases)
+    def test_move_activation_before_concat__addmm(self, activation, inplace, is_qat):
         input_shape = (1, 8)
         model = LinearConcatActivationModule(
             activation=activation, inplace=inplace, in_channels=8, mode="addmm"
@@ -464,10 +466,11 @@ def test_move_activation_before_concat__addmm(self, activation, inplace):
         neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec)
         neutron_aten_pass_manager(exir_program_aten)  # All passes by default.
 
-        exir_program_aten_quant = post_training_quantize(
+        exir_program_aten_quant = calibrate_and_quantize(
             exir_program_aten,
             calibration_inputs,
             NeutronQuantizer(neutron_target_spec),
+            is_qat=is_qat,
         )
 
         # Check addmm and activation are in same QDQ cluster.
@@ -498,18 +501,8 @@ def test_move_activation_before_concat__addmm(self, activation, inplace):
             == torch.ops.quantized_decomposed.quantize_per_tensor.default
         )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
-    def test_move_activation_before_concat__mm(self, activation, inplace):
+    @parameterized.expand(all_activation_cases)
+    def test_move_activation_before_concat__mm(self, activation, inplace, is_qat):
         input_shape = (1, 8)
         model = LinearConcatActivationModule(
             activation=activation, inplace=inplace, in_channels=8, mode="mm"
@@ -572,10 +565,11 @@ def test_move_activation_before_concat__mm(self, activation, inplace):
         neutron_aten_pass_manager = NeutronAtenPassManager(neutron_target_spec)
         neutron_aten_pass_manager(exir_program_aten)  # All passes by default.
 
-        exir_program_aten_quant = post_training_quantize(
+        exir_program_aten_quant = calibrate_and_quantize(
             exir_program_aten,
             calibration_inputs,
             NeutronQuantizer(neutron_target_spec),
+            is_qat=is_qat,
         )
 
         # Check mm and activation are in same QDQ cluster.
@@ -606,19 +600,9 @@ def test_move_activation_before_concat__mm(self, activation, inplace):
             == torch.ops.quantized_decomposed.quantize_per_tensor.default
         )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
+    @parameterized.expand(all_activation_cases)
     def test_move_activation_before_concat_quantization__conv(
-        self, activation, inplace
+        self, activation, inplace, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
@@ -631,7 +615,10 @@ def test_move_activation_before_concat_quantization__conv(
             )
 
             edge_program = to_quantized_edge_program(
-                model, input_shape, use_neutron_for_format_conversion=False
+                model,
+                input_shape,
+                use_qat=use_qat,
+                use_neutron_for_format_conversion=False,
             ).exported_program()
 
             # Make sure that all nodes were delegated.
@@ -655,19 +642,9 @@ def test_move_activation_before_concat_quantization__conv(
                 tflite_output_preprocess=ToChannelFirstPreprocess(),
             )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
+    @parameterized.expand(all_activation_cases)
     def test_move_activation_before_concat_quantization__linear(
-        self, activation, inplace
+        self, activation, inplace, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
@@ -680,7 +657,7 @@ def test_move_activation_before_concat_quantization__linear(
             )
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
@@ -702,19 +679,9 @@ def test_move_activation_before_concat_quantization__linear(
                 tfl_model=tflite_flatbuffers_model,
             )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
+    @parameterized.expand(all_activation_cases)
     def test_move_activation_before_concat_quantization__addmm(
-        self, activation, inplace
+        self, activation, inplace, use_qat
     ):
         torch.manual_seed(23)
         with kgb.spy_on(
@@ -728,7 +695,7 @@ def test_move_activation_before_concat_quantization__addmm(
             )
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
@@ -751,18 +718,10 @@ def test_move_activation_before_concat_quantization__addmm(
                 atol=1.0,
             )
 
-    @parameterized.expand(
-        [
-            ["relu", True],
-            ["relu", False],
-            ["relu6", True],
-            ["relu6", False],
-            ["tanh", True],
-            ["tanh", False],
-            ["sigmoid", False],
-        ]
-    )
-    def test_move_activation_before_concat_quantization__mm(self, activation, inplace):
+    @parameterized.expand(all_activation_cases)
+    def test_move_activation_before_concat_quantization__mm(
+        self, activation, inplace, use_qat
+    ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
             call_original=True,
@@ -774,7 +733,7 @@ def test_move_activation_before_concat_quantization__mm(self, activation, inplac
             )
 
             edge_program = to_quantized_edge_program(
-                model, input_shape
+                model, input_shape, use_qat=use_qat
             ).exported_program()
 
             # Make sure that all nodes were delegated.
@@ -796,17 +755,9 @@ def test_move_activation_before_concat_quantization__mm(self, activation, inplac
                 tfl_model=tflite_flatbuffers_model,
             )
 
-    @parameterized.expand(
-        [
-            ["relu", "relu", True, False],
-            ["relu6", "relu6", False, True],
-            ["tanh", "tanh", True, False],
-            ["sigmoid", "sigmoid", False, True],
-            ["relu", "relu_hardtanh", True, True],
-        ]
-    )
+    @parameterized.expand(all_concat_cluster_cases)
     def test_concat_cluster_quantization__conv(
-        self, activation1, activation2, act1_inplace, act2_inplace
+        self, activation1, activation2, act1_inplace, act2_inplace, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
@@ -814,7 +765,7 @@ def test_concat_cluster_quantization__conv(
             owner=EdgeProgramToIRConverter,
         ) as converter_spy:
             with kgb.spy_on(
-                post_training_quantize, call_original=True
+                calibrate_and_quantize, call_original=True
             ) as quantizer_spy:
                 input_shape = (1, 8, 8, 8)
                 model = ConvActivationConcatModule(
@@ -822,7 +773,10 @@ def test_concat_cluster_quantization__conv(
                 )
 
                 edge_program = to_quantized_edge_program(
-                    model, input_shape, use_neutron_for_format_conversion=False
+                    model,
+                    input_shape,
+                    use_qat=use_qat,
+                    use_neutron_for_format_conversion=False,
                 ).exported_program()
 
                 # Make sure that all nodes were delegated.
@@ -877,17 +831,9 @@ def test_concat_cluster_quantization__conv(
                     tflite_output_preprocess=ToChannelFirstPreprocess(),
                 )
 
-    @parameterized.expand(
-        [
-            ["relu", "relu", True, False],
-            ["relu6", "relu6", False, True],
-            ["tanh", "tanh", True, False],
-            ["sigmoid", "sigmoid", False, True],
-            ["relu", "relu_hardtanh", True, True],
-        ]
-    )
+    @parameterized.expand(all_concat_cluster_cases)
     def test_concat_cluster_quantization__linear(
-        self, activation1, activation2, act1_inplace, act2_inplace
+        self, activation1, activation2, act1_inplace, act2_inplace, use_qat
     ):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
@@ -895,7 +841,7 @@ def test_concat_cluster_quantization__linear(
             owner=EdgeProgramToIRConverter,
         ) as converter_spy:
             with kgb.spy_on(
-                post_training_quantize, call_original=True
+                calibrate_and_quantize, call_original=True
             ) as quantizer_spy:
                 input_shape = (1, 8)
                 model = LinearActivationConcatModule(
@@ -903,7 +849,7 @@ def test_concat_cluster_quantization__linear(
                 )
 
                 edge_program = to_quantized_edge_program(
-                    model, input_shape
+                    model, input_shape, use_qat=use_qat
                 ).exported_program()
 
                 # Make sure that all nodes were delegated.
diff --git a/backends/nxp/tests/test_per_channel_conversion.py b/backends/nxp/tests/test_per_channel_conversion.py
index 62cbef9e151..b3034ff17ed 100644
--- a/backends/nxp/tests/test_per_channel_conversion.py
+++ b/backends/nxp/tests/test_per_channel_conversion.py
@@ -31,11 +31,18 @@
 )
 from executorch.backends.nxp.tests.models import Conv2dModule
 from executorch.exir.dialects._ops import ops as exir_ops
+from parameterized import parameterized
 
 from torch import fx
 from torch._ops import OpOverload
 from torch.export import ExportedProgram
-from torchao.quantization.pt2e import MinMaxObserver, PerChannelMinMaxObserver
+from torchao.quantization.pt2e import (
+    FusedMovingAvgObsFakeQuantize,
+    MinMaxObserver,
+    MovingAverageMinMaxObserver,
+    MovingAveragePerChannelMinMaxObserver,
+    PerChannelMinMaxObserver,
+)
 from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     QuantizationConfig,
@@ -45,8 +52,8 @@
 
 class Conv2dPatternPerChannel(QuantizationPattern):
 
-    def __init__(self, is_per_channel: bool):
-        super().__init__()
+    def __init__(self, is_per_channel: bool, is_qat: bool):
+        super().__init__(is_qat=is_qat)
         self.is_per_channel = is_per_channel
 
     def partition_types(self) -> list[OpOverload]:
@@ -80,9 +87,20 @@ def get_anchors(
             if self.is_per_channel
             else torch.per_tensor_symmetric
         )
-        weight_observer_or_fake_quant_ctr = (
-            PerChannelMinMaxObserver if self.is_per_channel else MinMaxObserver
-        )
+        if self.is_qat:
+            observer = (
+                MovingAveragePerChannelMinMaxObserver
+                if self.is_per_channel
+                else MovingAverageMinMaxObserver
+            )
+            weight_observer_or_fake_quant_ctr = FusedMovingAvgObsFakeQuantize.with_args(
+                observer=observer
+            )
+        else:
+            weight_observer_or_fake_quant_ctr = (
+                PerChannelMinMaxObserver if self.is_per_channel else MinMaxObserver
+            )
+
         weight_quantization_spec = QuantizationSpec(
             dtype=torch.int8,
             observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr,
@@ -108,7 +126,8 @@ def setUpClass(cls):
         torch.manual_seed(25)
         np.random.seed(25)
 
-    def test_per_channel_convolution(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_per_channel_convolution(self, _, use_qat: bool):
         with kgb.spy_on(
             EdgeProgramToIRConverter.convert_program,
             call_original=True,
@@ -119,13 +138,18 @@ def test_per_channel_convolution(self):
             )
             input_shape = (1, 8, 32, 32)
 
-            static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None)
+            activation_qspec = act_qspec(is_qat=use_qat)
+            static_qconfig = QuantizationConfig(
+                activation_qspec, activation_qspec, wgt_qspec, None
+            )
             _ = to_quantized_edge_program(
                 model,
                 input_shape,
                 get_quantizer_fn=lambda: NeutronAtenQuantizer(
-                    Conv2dPatternPerChannel(is_per_channel=True), static_qconfig
+                    Conv2dPatternPerChannel(is_per_channel=True, is_qat=use_qat),
+                    static_qconfig,
                 ),
+                use_qat=use_qat,
                 use_neutron_for_format_conversion=False,
             )
 
diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
index 85736039d26..27422f9ce1e 100644
--- a/backends/nxp/tests/test_quantizer.py
+++ b/backends/nxp/tests/test_quantizer.py
@@ -5,6 +5,7 @@
 
 # Tests for NeutronQuantizer.
 
+import itertools
 from copy import deepcopy
 
 import executorch.backends.nxp.tests.executorch_pipeline as executorch_pipeline
@@ -29,9 +30,17 @@
     ToChannelLastPreprocess,
 )
 from executorch.exir.dialects._ops import ops as exir_ops
-from torch.export import ExportedProgram
+from torch.export import export, ExportedProgram
 from torch.fx import GraphModule
-from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torchao.quantization.pt2e import (
+    move_exported_model_to_eval,
+    move_exported_model_to_train,
+)
+from torchao.quantization.pt2e.quantize_pt2e import (
+    convert_pt2e,
+    prepare_pt2e,
+    prepare_qat_pt2e,
+)
 
 fuse_activation_ops = [
     exir_ops.edge.aten.addmm.default,
@@ -44,16 +53,45 @@
 ]
 
 
+# Permutation of all supported combinations of:
+# <activation>, <is_inplace>, <use_qat>
+all_activation_cases = list(
+    itertools.product(
+        ["relu", "relu6", "tanh"],
+        [True, False],
+        [True, False],
+    )
+) + [
+    ("sigmoid", False, True),
+    ("sigmoid", False, False),
+]
+
+
+@pytest.fixture(autouse=True)
+def reseed_model_per_test_run():
+    torch.manual_seed(23)
+
+
+def _prepare_for_quantization(exported_model, is_qat: bool = False):
+    if is_qat:
+        return prepare_qat_pt2e(
+            exported_model.module(), NeutronQuantizer(neutron_target_spec, is_qat=True)
+        )
+    else:
+        return prepare_pt2e(
+            exported_model.module(), NeutronQuantizer(neutron_target_spec)
+        )
+
+
 def test_quantizer_conv2d():
     model = models.Conv2dModule()
     model.eval()
 
     example_input = (torch.ones(1, 4, 32, 32),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -87,11 +125,10 @@ def test_quantizer_linear():
     model.eval()
 
     example_input = (torch.ones(10, 32),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -123,11 +160,10 @@ def test_quantizer_maxpool2d():
     model.eval()
 
     example_input = (torch.ones(1, 8, 32, 32),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -158,11 +194,10 @@ def test_quantizer_softmax():
     model.eval()
 
     example_input = (torch.ones(1, 10),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -192,11 +227,10 @@ def test_quantizer_single_maxpool2d():
     model.eval()
 
     example_input = (torch.ones(1, 4, 32, 32),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -214,11 +248,10 @@ def test_quantizer_conv2d_relu():
     model.eval()
 
     example_input = (torch.ones(1, 4, 32, 32),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -241,11 +274,10 @@ def test_quantizer_conv2d_avg_pool2d():
     model.eval()
 
     example_input = (torch.ones(1, 4, 16, 16),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -269,11 +301,10 @@ def test_quantizer_conv2d_permute():
     model.eval()
 
     example_input = (torch.ones(1, 4, 16, 16),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -301,11 +332,10 @@ def test_multiple_shared_spec_ops_in_row():
     model.eval()
 
     example_input = (torch.ones(1, 3, 64, 64),)
-    quantizer = NeutronQuantizer(neutron_target_spec)
-    graph_module = torch.export.export(model, example_input, strict=True).module()
+    exported_model = torch.export.export(model, example_input, strict=True)
 
     # noinspection PyTypeChecker
-    m = prepare_pt2e(graph_module, quantizer)
+    m = _prepare_for_quantization(exported_model)
     m(*example_input)
     m = convert_pt2e(m)
 
@@ -362,21 +392,10 @@ def test_quantizers_order_invariance():
     assert all(n == n_reversed for n, n_reversed in zip(nodes, nodes_reversed))
 
 
-@pytest.mark.parametrize(
-    "activation, inplace",
-    [
-        ("relu", True),
-        ("relu", False),
-        ("relu6", True),
-        ("relu6", False),
-        ("tanh", True),
-        ("tanh", False),
-        ("sigmoid", False),
-    ],
-)
-def test_quantizer__linear_w_activation(mocker, activation, inplace):
+@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases)
+def test_quantizer__linear_w_activation(mocker, activation, inplace, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-    quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize")
+    quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize")
 
     input_shape = (1, 4)
     model = models.LinearActivationModule(
@@ -386,7 +405,9 @@ def test_quantizer__linear_w_activation(mocker, activation, inplace):
         mode="linear",
     )
 
-    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+    edge_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure that all nodes were delegated.
     assert not graph_contains_any_of_ops(
@@ -418,28 +439,19 @@ def test_quantizer__linear_w_activation(mocker, activation, inplace):
     )
 
 
-@pytest.mark.parametrize(
-    "activation, inplace",
-    [
-        ("relu", True),
-        ("relu", False),
-        ("relu6", True),
-        ("relu6", False),
-        ("tanh", True),
-        ("tanh", False),
-        ("sigmoid", False),
-    ],
-)
-def test_quantizer__addmm_w_activation(mocker, activation, inplace):
+@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases)
+def test_quantizer__addmm_w_activation(mocker, activation, inplace, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-    quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize")
+    quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize")
 
     input_shape = (1, 4)
     model = models.LinearActivationModule(
         activation=activation, inplace=inplace, in_channels=input_shape[1], mode="addmm"
     )
 
-    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+    edge_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure that all nodes were delegated.
     assert not graph_contains_any_of_ops(
@@ -471,28 +483,19 @@ def test_quantizer__addmm_w_activation(mocker, activation, inplace):
     )
 
 
-@pytest.mark.parametrize(
-    "activation, inplace",
-    [
-        ("relu", True),
-        ("relu", False),
-        ("relu6", True),
-        ("relu6", False),
-        ("tanh", True),
-        ("tanh", False),
-        ("sigmoid", False),
-    ],
-)
-def test_quantizer__mm_w_activation(mocker, activation, inplace):
+@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases)
+def test_quantizer__mm_w_activation(mocker, activation, inplace, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-    quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize")
+    quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize")
 
     input_shape = (1, 4)
     model = models.LinearActivationModule(
         activation=activation, inplace=inplace, in_channels=input_shape[1], mode="mm"
     )
 
-    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+    edge_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure that all nodes were delegated.
     assert not graph_contains_any_of_ops(
@@ -524,28 +527,19 @@ def test_quantizer__mm_w_activation(mocker, activation, inplace):
     )
 
 
-@pytest.mark.parametrize(
-    "activation, inplace",
-    [
-        ("relu", True),
-        ("relu", False),
-        ("relu6", True),
-        ("relu6", False),
-        ("tanh", True),
-        ("tanh", False),
-        ("sigmoid", False),
-    ],
-)
-def test_quantizer__conv_w_activation(mocker, activation, inplace):
+@pytest.mark.parametrize("activation, inplace, use_qat", all_activation_cases)
+def test_quantizer__conv_w_activation(mocker, activation, inplace, use_qat):
     converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
-    quantizer_spy = mocker.spy(executorch_pipeline, "post_training_quantize")
+    quantizer_spy = mocker.spy(executorch_pipeline, "calibrate_and_quantize")
 
     input_shape = (1, 4, 8, 8)
     model = models.ConvActivationModule(
         activation=activation, inplace=inplace, in_channels=input_shape[1]
     )
 
-    edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+    edge_program = to_quantized_edge_program(
+        model, input_shape, use_qat=use_qat
+    ).exported_program()
 
     # Make sure that all nodes were delegated.
     assert not graph_contains_any_of_ops(
@@ -579,3 +573,66 @@ def test_quantizer__conv_w_activation(mocker, activation, inplace):
         tflite_output_preprocess=ToChannelFirstPreprocess(),
         atol=1.0,
     )
+
+
+def test_qat_train(loss_tolerance: float = 0.02):
+    def evaluate(model, inputs, gts):
+        with torch.no_grad():
+            test_outputs = model(inputs)
+            loss = torch.nn.functional.mse_loss(test_outputs, gts)
+            return loss
+
+    def train_step(model, optimizer):
+        optimizer.zero_grad()
+        batch = torch.randn(100, 1).clamp(-1, 1)
+        outputs = model(batch)
+        loss = torch.nn.functional.mse_loss(outputs, torch.sin(batch))
+        loss.backward()
+        optimizer.step()
+
+    model = models.MLP()
+    model.train()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+    for _ in range(100):
+        train_step(model, optimizer)
+
+    test_inputs = torch.randn(20, 1).clamp(-1, 1)
+
+    model.eval()
+    eval_loss = evaluate(model, test_inputs, torch.sin(test_inputs))
+
+    exported_model = export(model, (torch.randn(1, 1),), strict=True)
+    prepared_model = _prepare_for_quantization(exported_model, is_qat=True)
+
+    prepared_model = move_exported_model_to_train(prepared_model)
+    for _ in range(30):
+        train_step(prepared_model, optimizer)
+    prepared_model = move_exported_model_to_eval(prepared_model)
+
+    quantized_model = convert_pt2e(prepared_model)
+
+    test_inputs = torch.randn(100, 1).clamp(-1, 1)
+
+    quant_eval_loss = evaluate(quantized_model, test_inputs, torch.sin(test_inputs))
+
+    assert (quant_eval_loss - eval_loss) < loss_tolerance
+
+
+def test_qat_produces_same_graph_as_ptq():
+    model = models.MiniConvNetWithRegressionHead()
+    model.eval()
+    exported_model = export(model, ((torch.randn(1, 3, 32, 32),)), strict=True)
+
+    qat_prepared_model = _prepare_for_quantization(exported_model, is_qat=True)
+    qat_quantized_model = convert_pt2e(qat_prepared_model)
+
+    ptq_prepared_model = _prepare_for_quantization(exported_model, is_qat=False)
+    ptq_quantized_model = convert_pt2e(ptq_prepared_model)
+
+    assert all(
+        ptqn.target == qatn.target
+        for qatn, ptqn in zip(
+            qat_quantized_model.graph.nodes, ptq_quantized_model.graph.nodes
+        )
+    )
diff --git a/backends/nxp/tests/test_removing_dead_code.py b/backends/nxp/tests/test_removing_dead_code.py
index 18d2f1d698e..8b3a979f412 100644
--- a/backends/nxp/tests/test_removing_dead_code.py
+++ b/backends/nxp/tests/test_removing_dead_code.py
@@ -10,9 +10,10 @@
 import torch
 
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 from executorch.backends.nxp.tests.executorch_pipeline import neutron_target_spec
 from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops
+from parameterized import parameterized
 
 
 @pytest.fixture(autouse=True)
@@ -39,7 +40,8 @@ def setUpClass(cls):
         torch.manual_seed(23)
         np.random.seed(23)
 
-    def test_removing_dead_code(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_removing_dead_code(self, _, is_qat: bool):
         input_shape = (42,)
         example_inputs = (torch.ones(input_shape),)
         model = DeadCodeModule()
@@ -53,8 +55,8 @@ def test_removing_dead_code(self):
 
         # The `NeutronQuantizer` should remove the dead code in the `transform_for_annotation()` method.
         quantizer = NeutronQuantizer(neutron_target_spec)
-        exir_program_aten_quant = post_training_quantize(
-            exir_program_aten, [example_inputs], quantizer
+        exir_program_aten_quant = calibrate_and_quantize(
+            exir_program_aten, [example_inputs], quantizer, is_qat=is_qat
         )
 
         # Make sure the is no `add` operation in the graph anymore.
diff --git a/backends/nxp/tests/test_split_group_convolution.py b/backends/nxp/tests/test_split_group_convolution.py
index f5dfcff1fde..e8d807963ee 100644
--- a/backends/nxp/tests/test_split_group_convolution.py
+++ b/backends/nxp/tests/test_split_group_convolution.py
@@ -18,7 +18,7 @@
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 from executorch.backends.nxp.tests.executorch_pipeline import (
     get_random_calibration_inputs,
     neutron_target_spec,
@@ -38,14 +38,15 @@
 
 
 def _quantize_and_lower_module(
-    module: GraphModule, input_shape: tuple[int, ...], target="imxrt700"
+    module: GraphModule, input_shape: tuple[int, ...], is_qat: bool, target="imxrt700"
 ) -> EdgeProgramManager:
     calibration_inputs = get_random_calibration_inputs(to_model_input_spec(input_shape))
 
-    exir_program_aten__module_quant = post_training_quantize(
+    exir_program_aten__module_quant = calibrate_and_quantize(
         module,
         calibration_inputs,
         NeutronQuantizer(neutron_target_spec),
+        is_qat=is_qat,
     )
 
     edge_compile_config = EdgeCompileConfig(_check_ir_validity=False)
@@ -70,12 +71,17 @@ def setUp(cls):
 
     @parameterized.expand(
         [
-            ["group = 2", [1, 16, 10, 10], 2],
-            ["group = 3", [1, 24, 10, 10], 3],
-            ["group = 8", [1, 8, 10, 10], 8],
+            ["QAT; group = 2", [1, 16, 10, 10], 2, True],
+            ["PTQ; group = 2", [1, 16, 10, 10], 2, False],
+            ["QAT; group = 3", [1, 24, 10, 10], 3, True],
+            ["PTQ; group = 3", [1, 24, 10, 10], 3, False],
+            ["QAT; group = 8", [1, 8, 10, 10], 8, True],
+            ["PTQ; group = 8", [1, 8, 10, 10], 8, False],
         ]
     )
-    def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int):
+    def test_split_group_convolution__2d(
+        self, _, input_shape: list[int], group: int, is_qat: bool
+    ):
         example_input = (torch.ones(input_shape),)
 
         module = Conv2dModule(
@@ -116,7 +122,7 @@ def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int
 
         # Make sure the graph can be correctly quantized and lowered to edge.
         ep = _quantize_and_lower_module(
-            modified_module, tuple(input_shape)
+            modified_module, tuple(input_shape), is_qat=is_qat
         ).exported_program()
         nodes = list(ep.graph.nodes)
         assert nodes[-5].name == "lowered_module_0"
@@ -127,12 +133,17 @@ def test_split_group_convolution__2d(self, _, input_shape: list[int], group: int
 
     @parameterized.expand(
         [
-            ["group = 2", [1, 16, 10], 2],
-            ["group = 3", [1, 24, 10], 3],
-            ["group = 6", [1, 24, 10], 6],
+            ["QAT; group = 2", [1, 16, 10], 2, True],
+            ["PTQ; group = 2", [1, 16, 10], 2, False],
+            ["QAT; group = 3", [1, 24, 10], 3, True],
+            ["PTQ; group = 3", [1, 24, 10], 3, False],
+            ["QAT; group = 6", [1, 24, 10], 6, True],
+            ["PTQ; group = 6", [1, 24, 10], 6, False],
         ]
     )
-    def test_split_group_convolution__1d(self, _, input_shape: list[int], group: int):
+    def test_split_group_convolution__1d(
+        self, _, input_shape: list[int], group: int, is_qat: bool
+    ):
         example_input = (torch.ones(input_shape),)
 
         module = Conv1dModule(
@@ -173,7 +184,7 @@ def test_split_group_convolution__1d(self, _, input_shape: list[int], group: int
 
         # Make sure the graph can be correctly quantized and lowered to edge.
         ep = _quantize_and_lower_module(
-            modified_module, tuple(input_shape)
+            modified_module, tuple(input_shape), is_qat=is_qat
         ).exported_program()
         nodes = list(ep.graph.nodes)
         assert nodes[-5].name == "lowered_module_0"
@@ -219,7 +230,8 @@ def test_split_group_convolution__3d(self, _, input_shape: list[int], group: int
         out2 = modified_module(input_data).detach().numpy()
         assert np.allclose(out1, out2)
 
-    def test_split_group_convolution__applied_by_default(self):
+    @parameterized.expand([("QAT", True), ("PTQ", False)])
+    def test_split_group_convolution__applied_by_default(self, _, is_qat: bool):
         input_shape = [1, 16, 10, 10]
         group = 2
         example_input = (torch.ones(input_shape),)
@@ -261,7 +273,7 @@ def test_split_group_convolution__applied_by_default(self):
 
         # Make sure the graph can be correctly quantized and lowered to edge.
         ep = _quantize_and_lower_module(
-            modified_module, tuple(input_shape)
+            modified_module, tuple(input_shape), is_qat=is_qat
         ).exported_program()
         nodes = list(ep.graph.nodes)
         assert nodes[-5].name == "lowered_module_0"
diff --git a/backends/nxp/tests/use_qat.py b/backends/nxp/tests/use_qat.py
new file mode 100644
index 00000000000..5994d5aa193
--- /dev/null
+++ b/backends/nxp/tests/use_qat.py
@@ -0,0 +1,11 @@
+import pytest
+
+
+@pytest.fixture
+def use_qat(request):
+    return request.param
+
+
+def pytest_generate_tests(metafunc):
+    if "use_qat" in metafunc.fixturenames:
+        metafunc.parametrize("use_qat", [True, False], indirect=True)
diff --git a/docs/source/backends-nxp.md b/docs/source/backends-nxp.md
index 20dd180fb31..4f7e2e9c763 100644
--- a/docs/source/backends-nxp.md
+++ b/docs/source/backends-nxp.md
@@ -81,12 +81,12 @@ Or you can use the predefined function for post training quantization from NXP b
 ```python
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 
 ...
 
 target_spec = NeutronTargetSpec(target="imxrt700", converter_flavor="SDK_25_09")
-quantized_graph_module = post_training_quantize(
+quantized_graph_module = calibrate_and_quantize(
     aten_model,
     calibration_inputs,
     NeutronQuantizer(neutron_target_spec=target_spec),
diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py
index d2f539f0de8..175dc9d8d70 100644
--- a/examples/nxp/aot_neutron_compile.py
+++ b/examples/nxp/aot_neutron_compile.py
@@ -27,7 +27,7 @@
 from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner
 from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec
 from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer
-from executorch.backends.nxp.quantizer.utils import post_training_quantize
+from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize
 from executorch.devtools.visualization.visualization_utils import (
     visualize_with_clusters,
 )
@@ -219,7 +219,7 @@ def get_model_and_inputs_from_name(model_name: str):
             )
             calibration_inputs = example_inputs
         quantizer = NeutronQuantizer(neutron_target_spec)
-        module = post_training_quantize(module, calibration_inputs, quantizer)
+        module = calibrate_and_quantize(module, calibration_inputs, quantizer)
 
     if args.so_library is not None:
         logging.debug(f"Loading libraries: {args.so_library}")