pytorch · jerryzh168 · Apr 13, 2023 · Apr 13, 2023 · kimishpatel · Apr 13, 2023
diff --git a/test/quantization/fx/test_quantize_pt2e.py b/test/quantization/fx/test_quantize_pt2e.py
@@ -265,10 +265,8 @@ def forward(self, x):
         )
 
         m = prepare_pt2e_quantizer(m, quantizer)
-        print("after prepare:", m)
         m(*example_inputs)
         m = convert_pt2e(m)
-        print("m:", m)
         node_occurrence = {
             # input and output are using quantize_per_tensor and weight is using quantize_per_channel
             ns.call_function(torch.ops.quantized_decomposed.quantize_per_tensor): 5,

diff --git a/torch/ao/quantization/_pt2e/quantizer/qnnpack_quantizer.py b/torch/ao/quantization/_pt2e/quantizer/qnnpack_quantizer.py
@@ -258,14 +258,6 @@ def set_spec_for_operator_type(
     def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
         """ just handling global spec for now
         """
-        # initialize default target_dtype_info
-        _DEFAULT_TARGET_DTYPE_INFO = {
-            "input_act_obs_or_fq_ctr": _get_default_obs_or_fq_ctr(),
-            "output_act_obs_or_fq_ctr": _get_default_obs_or_fq_ctr(),
-        }
-        for node in model.graph.nodes:
-            node.meta["target_dtype_info"] = copy.deepcopy(_DEFAULT_TARGET_DTYPE_INFO)
-
         global_spec = self.operator_spec_config.global_spec
         ops = self.get_supported_operator_for_operator_spec(global_spec)
         # annotate the nodes from last to first since the matching is in the reversed order

diff --git a/torch/ao/quantization/fx/prepare.py b/torch/ao/quantization/fx/prepare.py
@@ -115,6 +115,8 @@
 # list of dtypes to not add observers to
 _DO_NOT_OBS_DTYPE_LIST = [int, float, torch.bool, None]
 
+_DEFAULT_FP32_OBS_OR_FQ_CTR = PlaceholderObserver.with_args(dtype=torch.float)
+
 # note: the following default target dtype info dicts are temporary,
 # should be moved to the new programmable API class soon
 _DEFAULT_FP32_QCONFIG_FOR_TARGET_DTYPE_INFO = {
@@ -497,8 +499,11 @@ def _get_arg_target_dtype_as_output(
         assert isinstance(observed_arg, Node), "Currently we only support observing Node"
         output_act_obs_or_fq_ctr = observed_arg.meta["target_dtype_info"]["output_act_obs_or_fq_ctr"]
     else:
-        output_act_obs_or_fq_ctr = \
-            arg.meta["target_dtype_info"]["output_act_obs_or_fq_ctr"]
+        if "target_dtype_info" in arg.meta:
+            output_act_obs_or_fq_ctr = \
+                arg.meta["target_dtype_info"].get("output_act_obs_or_fq_ctr", _DEFAULT_FP32_OBS_OR_FQ_CTR)
+        else:
+            output_act_obs_or_fq_ctr = _DEFAULT_FP32_OBS_OR_FQ_CTR
     output_act_dtype, _ = _get_dtype_and_is_dynamic(output_act_obs_or_fq_ctr)
     # TODO: should support is_dynamic here as well
     return output_act_dtype