From e2b257445f737c829f3a7546d52644b279e02352 Mon Sep 17 00:00:00 2001
From: Jerry Zhang <jerryzh168@gmail.com>
Date: Fri, 8 Sep 2023 15:50:20 -0700
Subject: [PATCH] [quant][be] Remove
 test/quantization/pt2e/test_quantize_pt2e_fx.py

Summary:
this is no longer needed since we have the quantizer api now

Test Plan:
.

Reviewers:

Subscribers:

Tasks:

Tags:

[ghstack-poisoned]
---
 .../pt2e/test_quantize_pt2e_fx.py             | 301 ------------------
 test/test_quantization.py                     |   2 -
 2 files changed, 303 deletions(-)
 delete mode 100644 test/quantization/pt2e/test_quantize_pt2e_fx.py

diff --git a/test/quantization/pt2e/test_quantize_pt2e_fx.py b/test/quantization/pt2e/test_quantize_pt2e_fx.py
deleted file mode 100644
index d6e3edf01ddab..0000000000000
--- a/test/quantization/pt2e/test_quantize_pt2e_fx.py
+++ /dev/null
@@ -1,301 +0,0 @@
-# Owner(s): ["oncall: quantization"]
-import copy
-
-import torch
-import torch._dynamo as torchdynamo
-import torch.nn as nn
-from torch.ao.ns.fx.utils import compute_sqnr
-from torch.ao.quantization import (
-    get_default_qconfig,
-    QConfigMapping,
-    default_per_channel_symmetric_qnnpack_qconfig,
-)
-from torch.ao.quantization.quantize_pt2e import (
-    convert_pt2e,
-    _prepare_pt2e_deprecated,
-)
-from torch.ao.quantization.backend_config import get_qnnpack_backend_config
-from torch.ao.quantization.backend_config._qnnpack_pt2e import (
-    get_qnnpack_pt2e_backend_config,
-)
-from torch.ao.quantization.quantize_fx import (
-    convert_to_reference_fx,
-    _convert_to_reference_decomposed_fx,
-    prepare_fx,
-)
-
-from torch.testing._internal.common_utils import (
-    IS_WINDOWS,
-)
-from torch.testing._internal.common_quantization import (
-    NodeSpec as ns,
-    QuantizationTestCase,
-    skip_if_no_torchvision,
-    skipIfNoQNNPACK,
-)
-from torch.testing._internal.common_quantized import override_quantized_engine
-import unittest
-
-
-# TODO: remove after quantizer API is more mature
-@unittest.skip("TODO: delete")
-@skipIfNoQNNPACK
-class TestQuantizePT2EFX(QuantizationTestCase):
-    def test_qconfig_none(self):
-        class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.conv1 = nn.Conv2d(1, 1, 1)
-                self.conv2 = nn.Conv2d(1, 1, 1)
-
-            def forward(self, x):
-                x = self.conv1(x)
-                x = self.conv2(x)
-                return x
-
-        with override_quantized_engine("qnnpack"):
-            m = M().eval()
-            example_inputs = (torch.randn(1, 1, 1, 1),)
-            # program capture
-            m, guards = torchdynamo.export(
-                m,
-                *copy.deepcopy(example_inputs),
-                aten_graph=True,
-                tracing_mode="real",
-            )
-
-            qconfig = get_default_qconfig("qnnpack")
-            qconfig_mapping = (
-                QConfigMapping().set_global(qconfig).set_module_name("conv2", None)
-            )
-            backend_config = get_qnnpack_pt2e_backend_config()
-            m = _prepare_pt2e_deprecated(m, qconfig_mapping, example_inputs, backend_config)
-            m(*example_inputs)
-            m = convert_pt2e(m)
-            m(*example_inputs)
-
-            # first conv is quantized, second conv is not quantized
-            node_occurrence = {
-                # two for input of the first conv, one for output for the first conv
-                ns.call_function(torch.ops.quantized_decomposed.quantize_per_tensor.default): 3,
-                ns.call_function(
-                    torch.ops.quantized_decomposed.dequantize_per_tensor.default
-                ): 3,
-            }
-            node_list = [
-                ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
-                ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
-                ns.call_function(torch.ops.aten.convolution.default),
-                ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
-                ns.call_function(torch.ops.aten.convolution.default),
-            ]
-            self.checkGraphModuleNodes(
-                m,
-                expected_node_list=node_list,
-                expected_node_occurrence=node_occurrence,
-            )
-
-    def test_qconfig_module_type(self):
-        class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.conv = nn.Conv2d(1, 1, 1)
-                self.linear = nn.Linear(9, 3)
-
-            def forward(self, x):
-                x = self.conv(x)
-                x = x.reshape((1, -1))
-                x = self.linear(x)
-                return x
-
-        with override_quantized_engine("qnnpack"):
-            m = M().eval()
-            example_inputs = (torch.randn(1, 1, 3, 3),)
-
-            # program capture
-            m, guards = torchdynamo.export(
-                m,
-                *copy.deepcopy(example_inputs),
-                aten_graph=True,
-                tracing_mode="real",
-            )
-
-            qconfig = get_default_qconfig("qnnpack")
-            qconfig_mapping = QConfigMapping().set_object_type(torch.nn.Conv2d, qconfig)
-            backend_config = get_qnnpack_pt2e_backend_config()
-            m = _prepare_pt2e_deprecated(m, qconfig_mapping, example_inputs, backend_config)
-            m(*example_inputs)
-            m = convert_pt2e(m)
-            m(*example_inputs)
-            # conv is quantized, linear is not quantized
-            node_occurrence = {
-                # two for input and weight of the conv, one for output for the conv
-                ns.call_function(torch.ops.quantized_decomposed.quantize_per_tensor.default): 3,
-                ns.call_function(
-                    torch.ops.quantized_decomposed.dequantize_per_tensor.default
-                ): 3,
-            }
-            node_list = [
-                ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
-                ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
-                ns.call_function(torch.ops.aten.convolution.default),
-                ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
-                ns.call_function(torch.ops.aten.addmm.default),
-            ]
-            self.checkGraphModuleNodes(m, expected_node_list=node_list)
-
-    def test_transposed_conv_bn_fusion(self):
-        class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.conv_trans = torch.nn.ConvTranspose2d(10, 20, 3)
-                # channels for batchnorm is the same as the out_channels for convtranspose
-                self.bn = torch.nn.BatchNorm2d(20)
-
-            def forward(self, x):
-                return self.bn(self.conv_trans(x))
-
-        with override_quantized_engine("qnnpack"):
-            m = M().eval()
-            example_inputs = (torch.randn(10, 10, 10, 10),)
-            # program capture
-            m, guards = torchdynamo.export(
-                m,
-                *copy.deepcopy(example_inputs),
-                aten_graph=True,
-                tracing_mode="real",
-            )
-
-            node_occurrence = {
-                ns.call_function(torch.ops.aten.convolution.default): 1,
-                ns.call_function(
-                    torch.ops.aten._native_batch_norm_legit_no_training.default
-                ): 1,
-            }
-            self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
-
-            qconfig = get_default_qconfig("qnnpack")
-            qconfig_mapping = QConfigMapping().set_global(qconfig)
-            backend_config = get_qnnpack_pt2e_backend_config()
-            m = _prepare_pt2e_deprecated(m, qconfig_mapping, example_inputs, backend_config)
-            # make sure it runs
-            m(*example_inputs)
-
-            # make sure bn is fused into conv
-            node_occurrence = {
-                ns.call_function(torch.ops.aten.convolution.default): 1,
-                ns.call_function(
-                    torch.ops.aten._native_batch_norm_legit_no_training.default
-                ): 0,
-            }
-            self.checkGraphModuleNodes(m, expected_node_occurrence=node_occurrence)
-
-    # TODO(jerryzh168): move all _convert_to_reference_decomposed_fx tests here
-    @unittest.skipIf(IS_WINDOWS, "torch.compile is not supported on Windows")
-    def test__convert_to_reference_decomposed_fx_per_channel_quant_module(self):
-        """ Test the result for per channel weight quant for reference modules
-        """
-        class M(torch.nn.Module):
-            def __init__(self):
-                super().__init__()
-                self.conv = torch.nn.Conv2d(3, 3, 3)
-
-            def forward(self, x):
-                return self.conv(x)
-
-        m = M().eval()
-        qconfig_mapping = QConfigMapping().set_global(default_per_channel_symmetric_qnnpack_qconfig)
-        example_inputs = (torch.randn(1, 3, 10, 10),)
-        m = prepare_fx(m, qconfig_mapping, example_inputs, backend_config=get_qnnpack_backend_config())
-        m(*example_inputs)
-        m_ref = copy.deepcopy(m)
-        m_ref = convert_to_reference_fx(m_ref, backend_config=get_qnnpack_backend_config())
-        m = _convert_to_reference_decomposed_fx(m, backend_config=get_qnnpack_backend_config())
-        expected_occurrence = {
-            # for input and output activations
-            ns.call_function(torch.ops.quantized_decomposed.quantize_per_tensor.default): 2,
-            ns.call_function(torch.ops.quantized_decomposed.dequantize_per_tensor.default): 2,
-            # weight is per channel quantized
-            ns.call_function(torch.ops.quantized_decomposed.quantize_per_channel.default): 1,
-            ns.call_function(torch.ops.quantized_decomposed.dequantize_per_channel.default): 1,
-        }
-        import torch._dynamo as torchdynamo
-        m, guards = torchdynamo.export(
-            m,
-            *copy.deepcopy(example_inputs),
-            aten_graph=True,
-            tracing_mode="real",
-        )
-        self.checkGraphModuleNodes(
-            m,
-            expected_node_occurrence=expected_occurrence)
-        # make sure it runs
-        res_ref = m_ref(*example_inputs)
-        res = m(*example_inputs)
-        self.assertEqual(res, res_ref)
-        # check the qmin/qmax for per channel quant
-        for n in m.graph.nodes:
-            if n.op == "call_function" and \
-               n.target == torch.ops.quantized_decomposed.quantize_per_channel.default:
-                _QUANT_MIN_INDEX = 4
-                _QUANT_MAX_INDEX = 5
-                self.assertEqual(n.args[_QUANT_MIN_INDEX], -127)
-                self.assertEqual(n.args[_QUANT_MAX_INDEX], 127)
-
-@unittest.skip("TODO: delete")
-class TestQuantizePT2EFXModels(QuantizationTestCase):
-    @skip_if_no_torchvision
-    @skipIfNoQNNPACK
-    def test_resnet18(self):
-        import torchvision
-
-        with override_quantized_engine("qnnpack"):
-            example_inputs = (torch.randn(1, 3, 224, 224),)
-            m = torchvision.models.resnet18().eval()
-            m_copy = copy.deepcopy(m)
-            # program capture
-            m, guards = torchdynamo.export(
-                m,
-                *copy.deepcopy(example_inputs),
-                aten_graph=True,
-                tracing_mode="real",
-            )
-
-            backend_config = get_qnnpack_pt2e_backend_config()
-            # TODO: define qconfig_mapping specifically for executorch
-            qconfig = get_default_qconfig("qnnpack")
-            qconfig_mapping = QConfigMapping().set_global(qconfig)
-            before_fusion_result = m(*example_inputs)
-
-            m = _prepare_pt2e_deprecated(m, qconfig_mapping, example_inputs, backend_config)
-
-            # checking that we inserted observers correctly for maxpool operator (input and
-            # output share observer instance)
-            self.assertEqual(
-                id(m.activation_post_process_3), id(m.activation_post_process_2)
-            )
-            after_prepare_result = m(*example_inputs)
-            m = convert_pt2e(m)
-
-            after_quant_result = m(*example_inputs)
-
-            # comparing with existing fx graph mode quantization reference flow
-            backend_config = get_qnnpack_backend_config()
-            m_fx = prepare_fx(
-                m_copy, qconfig_mapping, example_inputs, backend_config=backend_config
-            )
-            after_prepare_result_fx = m_fx(*example_inputs)
-            m_fx = _convert_to_reference_decomposed_fx(m_fx, backend_config=backend_config)
-
-            after_quant_result_fx = m_fx(*example_inputs)
-
-            # the result matches exactly after prepare
-            self.assertEqual(after_prepare_result, after_prepare_result_fx)
-            self.assertEqual(
-                compute_sqnr(after_prepare_result, after_prepare_result_fx),
-                torch.tensor(float("inf")),
-            )
-            # there are slight differences after convert due to different implementations
-            # of quant/dequant
-            self.assertTrue(torch.max(after_quant_result - after_quant_result_fx) < 1e-1)
-            self.assertTrue(compute_sqnr(after_quant_result, after_quant_result_fx) > 35)
diff --git a/test/test_quantization.py b/test/test_quantization.py
index 46cdc084ecdd7..b085c8dee3676 100644
--- a/test/test_quantization.py
+++ b/test/test_quantization.py
@@ -88,8 +88,6 @@
     from quantization.pt2e.test_quantize_pt2e import TestQuantizePT2ERepresentation  # noqa: F401
     from quantization.pt2e.test_quantize_pt2e import TestQuantizePT2EModels  # noqa: F401
     from quantization.pt2e.test_x86inductor_quantizer import TestQuantizePT2EX86Inductor  # noqa: F401
-    from quantization.pt2e.test_quantize_pt2e_fx import TestQuantizePT2EFX  # noqa: F401
-    from quantization.pt2e.test_quantize_pt2e_fx import TestQuantizePT2EFXModels  # noqa: F401
 except ImportError as e:
     # In FBCode we separate PT2 out into a separate target for the sake of dev
     # velocity. These are covered by a separate test target `quantization_pt2e`