From 9eaa4764e751e0efe52eade1e026f8d137835fe0 Mon Sep 17 00:00:00 2001
From: V-E-D <vedantthote2019@gmail.com>
Date: Sun, 25 May 2025 10:59:03 +0530
Subject: [PATCH 1/3] test_affine_quantized_float.py pytest too unittest

---
 test/dtypes/test_affine_quantized_float.py | 108 ++++++++++-----------
 1 file changed, 52 insertions(+), 56 deletions(-)

diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py
index 8c36f5ac7a..7fa14be88c 100644
--- a/test/dtypes/test_affine_quantized_float.py
+++ b/test/dtypes/test_affine_quantized_float.py
@@ -3,24 +3,22 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
+import unittest
 
 from torchao.utils import (
     TORCH_VERSION_AT_LEAST_2_5,
 )
 
 if not TORCH_VERSION_AT_LEAST_2_5:
-    pytest.skip("Unsupported PyTorch version", allow_module_level=True)
+    unittest.skip("Unsupported PyTorch version")
 
 import copy
 import io
 import random
-import unittest
 from contextlib import nullcontext
 from functools import partial
 from typing import Tuple
 
-import pytest
 import torch
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch.testing._internal import common_utils
@@ -95,68 +93,66 @@ def test_fp8_linear_variants(
                     "Static quantization only supports PerTensor granularity"
                 )
 
-        error_context = (
-            pytest.raises(AssertionError, match=error_message)
-            if error_message
-            else nullcontext()
+        if error_message:
+            with self.assertRaisesRegex(AssertionError, error_message):
+                self._run_fp8_linear_variant(dtype, mode, compile, sizes, granularity)
+        else:
+            self._run_fp8_linear_variant(dtype, mode, compile, sizes, granularity)
+
+    def _run_fp8_linear_variant(self, dtype, mode, compile, sizes, granularity):
+        M, N, K = sizes
+        input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda")
+        # Get a "reasonable" scale for the input tensor even though
+        # we use the same scale for multiple activations
+        scale, _ = choose_qparams_affine(
+            input_tensor,
+            MappingType.SYMMETRIC,
+            input_tensor.shape,
+            torch.float8_e4m3fn,
+            scale_dtype=torch.float32,
         )
+        mode_map = {
+            "dynamic": partial(
+                float8_dynamic_activation_float8_weight, granularity=granularity
+            ),
+            "weight-only": float8_weight_only,
+            "static": partial(
+                float8_static_activation_float8_weight,
+                scale=scale,
+                granularity=granularity,
+            ),
+        }
 
-        with error_context:
-            M, N, K = sizes
-            input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda")
-            # Get a "reasonable" scale for the input tensor even though
-            # we use the same scale for multiple activations
-            scale, _ = choose_qparams_affine(
-                input_tensor,
-                MappingType.SYMMETRIC,
-                input_tensor.shape,
-                torch.float8_e4m3fn,
-                scale_dtype=torch.float32,
-            )
-            mode_map = {
-                "dynamic": partial(
-                    float8_dynamic_activation_float8_weight, granularity=granularity
-                ),
-                "weight-only": float8_weight_only,
-                "static": partial(
-                    float8_static_activation_float8_weight,
-                    scale=scale,
-                    granularity=granularity,
-                ),
-            }
-
-            # Create a linear layer with bfloat16 dtype
-            model = ToyLinearModel(K, N).eval().to(dtype).to("cuda")
-
-            quantized_model = copy.deepcopy(model)
-            factory = mode_map[mode]()
-            quantize_(quantized_model, factory)
-
-            if compile:
-                quantized_model = torch.compile(quantized_model, fullgraph=True)
-
-            output_original = model(input_tensor)
-            output_quantized = quantized_model(input_tensor)
-
-            error = compute_error(output_original, output_quantized)
-            assert compute_error(output_original, output_quantized) > 20, (
-                f"Quantization error is too high got a SQNR of {error}"
-            )
+        # Create a linear layer with bfloat16 dtype
+        model = ToyLinearModel(K, N).eval().to(dtype).to("cuda")
+
+        quantized_model = copy.deepcopy(model)
+        factory = mode_map[mode]()
+        quantize_(quantized_model, factory)
+
+        if compile:
+            quantized_model = torch.compile(quantized_model, fullgraph=True)
+
+        output_original = model(input_tensor)
+        output_quantized = quantized_model(input_tensor)
+
+        error = compute_error(output_original, output_quantized)
+        self.assertGreater(error, 20, f"Quantization error is too high got a SQNR of {error}")
 
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
     def test_invalid_granularity(self):
-        with pytest.raises(ValueError, match="Invalid granularity specification"):
+        with self.assertRaisesRegex(ValueError, "Invalid granularity specification"):
             float8_dynamic_activation_float8_weight(granularity="invalid")
 
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
     def test_mismatched_granularity(self):
-        with pytest.raises(
+        with self.assertRaisesRegex(
             ValueError,
-            match="Different granularities for activation and weight are not supported",
+            "Different granularities for activation and weight are not supported",
         ):
             float8_dynamic_activation_float8_weight(granularity=(PerTensor(), PerRow()))
 
@@ -167,7 +163,7 @@ def test_unsupported_granularity(self):
         class UnsupportedGranularity:
             pass
 
-        with pytest.raises(ValueError, match="Invalid granularity types"):
+        with self.assertRaisesRegex(ValueError, "Invalid granularity types"):
             float8_dynamic_activation_float8_weight(
                 granularity=(UnsupportedGranularity(), UnsupportedGranularity())
             )
@@ -177,9 +173,9 @@ class UnsupportedGranularity:
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
     )
     def test_per_row_with_float32(self):
-        with pytest.raises(
+        with self.assertRaisesRegex(
             AssertionError,
-            match="PerRow quantization only works for bfloat16 precision",
+            "PerRow quantization only works for bfloat16 precision",
         ):
             model = ToyLinearModel(64, 64).eval().to(torch.float32).to("cuda")
             quantize_(
@@ -317,4 +313,4 @@ def test_mm_float8dq(self):
 common_utils.instantiate_parametrized_tests(TestAffineQuantizedFloat8Compile)
 
 if __name__ == "__main__":
-    pytest.main([__file__])
+    unittest.main()

From 59ade08cd9e1ce7cedf2d083aa2c2b516b23ccbf Mon Sep 17 00:00:00 2001
From: V-E-D <vedantthote2019@gmail.com>
Date: Sun, 25 May 2025 11:05:38 +0530
Subject: [PATCH 2/3] test_bitpacking.py pytest too unittest

---
 test/dtypes/test_bitpacking.py | 139 +++++++++++++++++++--------------
 1 file changed, 82 insertions(+), 57 deletions(-)

diff --git a/test/dtypes/test_bitpacking.py b/test/dtypes/test_bitpacking.py
index 0ed4462d5d..80d3245401 100644
--- a/test/dtypes/test_bitpacking.py
+++ b/test/dtypes/test_bitpacking.py
@@ -3,9 +3,10 @@
 #
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
-import pytest
+import unittest
 import torch
 from torch.utils._triton import has_triton
+from torch.testing._internal import common_utils
 
 from torchao.dtypes.uintx.bitpacking import pack, pack_cpu, unpack, unpack_cpu
 
@@ -13,68 +14,92 @@
 dimensions = (0, -1, 1)
 
 
-@pytest.fixture(autouse=True)
-def run_before_and_after_tests():
-    yield
-    torch._dynamo.reset()  # reset cache between tests
+class TestBitpacking(unittest.TestCase):
+    def setUp(self):
+        """Set up test fixtures before each test method."""
+        # Set random seeds for reproducibility
+        torch.manual_seed(0)
+        if torch.cuda.is_available():
+            torch.cuda.manual_seed(0)
+        
+        # Initialize any common test data
+        self.test_shape = (32, 32, 32)
+        
+        # Set default device
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        
+        # Reset any cached state
+        torch._dynamo.reset()
 
+    def tearDown(self):
+        """Clean up after each test method."""
+        # Reset torch._dynamo cache
+        torch._dynamo.reset()
+        
+        # Clear CUDA cache if available
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+        
+        # Reset any other state if needed
+        torch._dynamo.config.specialize_int = False
 
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_CPU(bit_width, dim):
-    test_tensor = torch.randint(
-        0, 2**bit_width, (32, 32, 32), dtype=torch.uint8, device="cpu"
-    )
-    packed = pack_cpu(test_tensor, bit_width, dim=dim)
-    unpacked = unpack_cpu(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    @common_utils.parametrize("bit_width", bit_widths)
+    @common_utils.parametrize("dim", dimensions)
+    def test_CPU(self, bit_width, dim):
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8, device="cpu"
+        )
+        packed = pack_cpu(test_tensor, bit_width, dim=dim)
+        unpacked = unpack_cpu(packed, bit_width, dim=dim)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @common_utils.parametrize("bit_width", bit_widths)
+    @common_utils.parametrize("dim", dimensions)
+    def test_GPU(self, bit_width, dim):
+        test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
+        packed = pack(test_tensor, bit_width, dim=dim)
+        unpacked = unpack(packed, bit_width, dim=dim)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_GPU(bit_width, dim):
-    test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
-    packed = pack(test_tensor, bit_width, dim=dim)
-    unpacked = unpack(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    @unittest.skipIf(not has_triton(), "unsupported without triton")
+    @common_utils.parametrize("bit_width", bit_widths)
+    @common_utils.parametrize("dim", dimensions)
+    def test_compile(self, bit_width, dim):
+        torch._dynamo.config.specialize_int = True
+        torch.compile(pack, fullgraph=True)
+        torch.compile(unpack, fullgraph=True)
+        test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
+        packed = pack(test_tensor, bit_width, dim=dim)
+        unpacked = unpack(packed, bit_width, dim=dim)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
+    @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
+    def test_pack_example(self):
+        test_tensor = torch.tensor(
+            [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
+        ).cuda()
+        shard_4, shard_2 = pack(test_tensor, 6)
+        print(shard_4, shard_2)
+        self.assertTrue(torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4))
+        self.assertTrue(torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2))
+        unpacked = unpack([shard_4, shard_2], 6)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-@pytest.mark.skipif(not has_triton(), reason="unsupported without triton")
-@pytest.mark.parametrize("bit_width", bit_widths)
-@pytest.mark.parametrize("dim", dimensions)
-def test_compile(bit_width, dim):
-    torch._dynamo.config.specialize_int = True
-    torch.compile(pack, fullgraph=True)
-    torch.compile(unpack, fullgraph=True)
-    test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
-    packed = pack(test_tensor, bit_width, dim=dim)
-    unpacked = unpack(packed, bit_width, dim=dim)
-    assert unpacked.allclose(test_tensor)
+    def test_pack_example_CPU(self):
+        test_tensor = torch.tensor(
+            [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
+        )
+        shard_4, shard_2 = pack(test_tensor, 6)
+        print(shard_4, shard_2)
+        self.assertTrue(torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4))
+        self.assertTrue(torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2))
+        unpacked = unpack([shard_4, shard_2], 6)
+        self.assertTrue(unpacked.allclose(test_tensor))
 
 
-# these test cases are for the example pack walk through in the bitpacking.py file
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available")
-def test_pack_example():
-    test_tensor = torch.tensor(
-        [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
-    ).cuda()
-    shard_4, shard_2 = pack(test_tensor, 6)
-    print(shard_4, shard_2)
-    assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4)
-    assert torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2)
-    unpacked = unpack([shard_4, shard_2], 6)
-    assert unpacked.allclose(test_tensor)
+common_utils.instantiate_parametrized_tests(TestBitpacking)
 
-
-def test_pack_example_CPU():
-    test_tensor = torch.tensor(
-        [0x30, 0x29, 0x17, 0x5, 0x20, 0x16, 0x9, 0x22], dtype=torch.uint8
-    )
-    shard_4, shard_2 = pack(test_tensor, 6)
-    print(shard_4, shard_2)
-    assert torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4)
-    assert torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2)
-    unpacked = unpack([shard_4, shard_2], 6)
-    assert unpacked.allclose(test_tensor)
+if __name__ == "__main__":
+    unittest.main()

From e9c369ba3b97d8a221109c9ce6a4c2e7a45e3487 Mon Sep 17 00:00:00 2001
From: V-E-D <vedantthote2019@gmail.com>
Date: Sun, 25 May 2025 11:08:20 +0530
Subject: [PATCH 3/3] precommit

---
 test/dtypes/test_affine_quantized_float.py |  5 ++--
 test/dtypes/test_bitpacking.py             | 33 ++++++++++++++--------
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py
index 7fa14be88c..b9488a0617 100644
--- a/test/dtypes/test_affine_quantized_float.py
+++ b/test/dtypes/test_affine_quantized_float.py
@@ -15,7 +15,6 @@
 import copy
 import io
 import random
-from contextlib import nullcontext
 from functools import partial
 from typing import Tuple
 
@@ -137,7 +136,9 @@ def _run_fp8_linear_variant(self, dtype, mode, compile, sizes, granularity):
         output_quantized = quantized_model(input_tensor)
 
         error = compute_error(output_original, output_quantized)
-        self.assertGreater(error, 20, f"Quantization error is too high got a SQNR of {error}")
+        self.assertGreater(
+            error, 20, f"Quantization error is too high got a SQNR of {error}"
+        )
 
     @unittest.skipIf(
         not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9"
diff --git a/test/dtypes/test_bitpacking.py b/test/dtypes/test_bitpacking.py
index 80d3245401..e91ad627d2 100644
--- a/test/dtypes/test_bitpacking.py
+++ b/test/dtypes/test_bitpacking.py
@@ -4,9 +4,10 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 import unittest
+
 import torch
-from torch.utils._triton import has_triton
 from torch.testing._internal import common_utils
+from torch.utils._triton import has_triton
 
 from torchao.dtypes.uintx.bitpacking import pack, pack_cpu, unpack, unpack_cpu
 
@@ -21,13 +22,13 @@ def setUp(self):
         torch.manual_seed(0)
         if torch.cuda.is_available():
             torch.cuda.manual_seed(0)
-        
+
         # Initialize any common test data
         self.test_shape = (32, 32, 32)
-        
+
         # Set default device
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        
+
         # Reset any cached state
         torch._dynamo.reset()
 
@@ -35,11 +36,11 @@ def tearDown(self):
         """Clean up after each test method."""
         # Reset torch._dynamo cache
         torch._dynamo.reset()
-        
+
         # Clear CUDA cache if available
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
-        
+
         # Reset any other state if needed
         torch._dynamo.config.specialize_int = False
 
@@ -57,7 +58,9 @@ def test_CPU(self, bit_width, dim):
     @common_utils.parametrize("bit_width", bit_widths)
     @common_utils.parametrize("dim", dimensions)
     def test_GPU(self, bit_width, dim):
-        test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8
+        ).cuda()
         packed = pack(test_tensor, bit_width, dim=dim)
         unpacked = unpack(packed, bit_width, dim=dim)
         self.assertTrue(unpacked.allclose(test_tensor))
@@ -70,7 +73,9 @@ def test_compile(self, bit_width, dim):
         torch._dynamo.config.specialize_int = True
         torch.compile(pack, fullgraph=True)
         torch.compile(unpack, fullgraph=True)
-        test_tensor = torch.randint(0, 2**bit_width, (32, 32, 32), dtype=torch.uint8).cuda()
+        test_tensor = torch.randint(
+            0, 2**bit_width, (32, 32, 32), dtype=torch.uint8
+        ).cuda()
         packed = pack(test_tensor, bit_width, dim=dim)
         unpacked = unpack(packed, bit_width, dim=dim)
         self.assertTrue(unpacked.allclose(test_tensor))
@@ -82,8 +87,12 @@ def test_pack_example(self):
         ).cuda()
         shard_4, shard_2 = pack(test_tensor, 6)
         print(shard_4, shard_2)
-        self.assertTrue(torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4))
-        self.assertTrue(torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2))
+        self.assertTrue(
+            torch.tensor([0, 105, 151, 37], dtype=torch.uint8).cuda().allclose(shard_4)
+        )
+        self.assertTrue(
+            torch.tensor([39, 146], dtype=torch.uint8).cuda().allclose(shard_2)
+        )
         unpacked = unpack([shard_4, shard_2], 6)
         self.assertTrue(unpacked.allclose(test_tensor))
 
@@ -93,7 +102,9 @@ def test_pack_example_CPU(self):
         )
         shard_4, shard_2 = pack(test_tensor, 6)
         print(shard_4, shard_2)
-        self.assertTrue(torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4))
+        self.assertTrue(
+            torch.tensor([0, 105, 151, 37], dtype=torch.uint8).allclose(shard_4)
+        )
         self.assertTrue(torch.tensor([39, 146], dtype=torch.uint8).allclose(shard_2))
         unpacked = unpack([shard_4, shard_2], 6)
         self.assertTrue(unpacked.allclose(test_tensor))