From 8a7153fcb27cb00ec942ba6c498cf43b4eeabb93 Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 11:34:24 +0800 Subject: [PATCH 01/10] enable test/dtypes/test_affine_quantized.py on intel XPU --- test/dtypes/test_affine_quantized.py | 57 ++++++++++++++-------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index 9679f6975a..e0669a576f 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -42,15 +42,16 @@ is_fbcode, is_ROCM, is_sm_at_least_89, + get_current_accelerator_device, ) is_cusparselt_available = ( hasattr(torch.backends, "cusparselt") and torch.backends.cusparselt.is_available() ) - +_DEVICE = get_current_accelerator_device() def get_quantization_functions( - do_sparse: bool, do_int4: bool, device: str = "cuda", int4_zp_int: bool = False + do_sparse: bool, do_int4: bool, device: str = _DEVICE, int4_zp_int: bool = False ): base_functions = [ Int8WeightOnlyConfig(), @@ -105,9 +106,9 @@ class TestAffineQuantized(TestCase): ["xpu"] if torch.xpu.is_available() else [] ) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") def test_tensor_core_layout_transpose(self): - linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda") + linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device=_DEVICE) t = linear.weight shape = t.shape apply_int4_weight_only_quant = Int4WeightOnlyConfig(group_size=32, version=1) @@ -169,7 +170,7 @@ def _apply(module, config_or_subclass_inserter): ql = _apply(linear, apply_quant) ql.to(device) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") def test_register_new_dispatch(self): from torchao.dtypes import AffineQuantizedTensor from torchao.dtypes.affine_quantized_tensor_ops import ( @@ -206,10 +207,10 @@ def apply_uint6_weight_only_quant(linear): ) return linear - linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda") + linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device=_DEVICE) apply_uint6_weight_only_quant(linear) - example_input = torch.randn(1, 128, dtype=torch.bfloat16, device="cuda") + example_input = torch.randn(1, 128, dtype=torch.bfloat16, device=_DEVICE) with self.assertRaisesRegex( AssertionError, "dispatching to my impl for uint6 weight only quant" ): @@ -232,13 +233,13 @@ def test_print_quantized_module(self): ql = apply_quant(linear) assert "AffineQuantizedTensor" in str(ql) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @common_utils.parametrize( - "apply_quant", get_quantization_functions(False, True, "cuda", False) + "apply_quant", get_quantization_functions(False, True, _DEVICE, False) ) def test_test_copy__apply(self, apply_quant): - linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda") - linear2 = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda") + linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device=_DEVICE) + linear2 = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device=_DEVICE) if isinstance(apply_quant, AOBaseConfig): quantize_(linear, apply_quant) @@ -249,20 +250,20 @@ def test_test_copy__apply(self, apply_quant): ql = apply_quant(linear) ql2 = apply_quant(linear2) - example_input = torch.randn(1, 128, dtype=torch.bfloat16, device="cuda") + example_input = torch.randn(1, 128, dtype=torch.bfloat16, device=_DEVICE) output = ql(example_input) ql2.weight.copy_(ql.weight) ql2.bias = ql.bias output2 = ql2(example_input) self.assertEqual(output, output2) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @common_utils.parametrize( - "apply_quant", get_quantization_functions(False, True, "cuda", False) + "apply_quant", get_quantization_functions(False, True, _DEVICE, False) ) def test_copy__mismatch_metadata(self, apply_quant): - linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device="cuda") - linear2 = torch.nn.Linear(128, 512, dtype=torch.bfloat16, device="cuda") + linear = torch.nn.Linear(128, 256, dtype=torch.bfloat16, device=_DEVICE) + linear2 = torch.nn.Linear(128, 512, dtype=torch.bfloat16, device=_DEVICE) if isinstance(apply_quant, AOBaseConfig): quantize_(linear, apply_quant) @@ -336,9 +337,9 @@ def test_alias(self, device, dtype): quantize_(dummy, Int8DynamicActivationInt8WeightConfig()) _ = dummy.weight[...] - @common_utils.parametrize("device", ["cuda"]) + @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16]) - @skip_if_no_cuda() + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @skip_if_rocm("ROCm enablement in progress") def test_slice_int4wo(self, device, dtype): # in_feature not divisible by 1024 @@ -350,9 +351,9 @@ def test_slice_int4wo(self, device, dtype): _ = dummy.weight.narrow(0, 0, 64) _ = dummy.weight.narrow(1, 0, 128) - @common_utils.parametrize("device", ["cuda"]) + @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.float16, torch.bfloat16]) - @skip_if_no_cuda() + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @skip_if_no_gemlite() def test_slice_gemlite(self, device, dtype): # in_feature not divisible by 1024 @@ -433,7 +434,7 @@ def dequant(input_layer, in_features, orig_shape): ) self.assertEqual((W_slice_ref - W_slice).abs().mean().item(), 0) - @common_utils.parametrize("device", ["cuda"]) + @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16]) def test_matmul(self, device, dtype): x = torch.randn(53, 2048) @@ -450,14 +451,14 @@ def test_matmul(self, device, dtype): # make sure it runs torch.matmul(x, w.t()) - @common_utils.parametrize("device", ["cuda"]) + @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16]) - @skip_if_no_cuda() + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @skip_if_rocm("ROCm enablement in progress") def test_slice_and_copy_int4wo(self, device, dtype): - l = torch.nn.Linear(1024, 1024).to("cuda").to(torch.bfloat16) + l = torch.nn.Linear(1024, 1024).to(_DEVICE).to(torch.bfloat16) l.weight = torch.nn.Parameter( - torch.zeros(1024, 1024, dtype=torch.bfloat16, device="cuda") + torch.zeros(1024, 1024, dtype=torch.bfloat16, device=_DEVICE) ) quantize_(l, Int4WeightOnlyConfig(version=1)) param = l.weight @@ -474,7 +475,7 @@ def test_slice_and_copy_int4wo(self, device, dtype): assert param.data.dequantize()[0][0] == 0 # dummy_l has random input (shouldn't be 0) - dummy_l = torch.nn.Linear(1024, 1024).to("cuda").to(torch.bfloat16) + dummy_l = torch.nn.Linear(1024, 1024).to(_DEVICE).to(torch.bfloat16) quantize_(dummy_l, Int4WeightOnlyConfig(version=1)) quantized = dummy_l.weight quantized = quantized.narrow(0, 0, 512) @@ -484,9 +485,9 @@ def test_slice_and_copy_int4wo(self, device, dtype): # making sure param.data is updated assert param.data.dequantize()[0][0] != 0 - @common_utils.parametrize("device", ["cuda"]) + @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16]) - @skip_if_no_cuda() + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @skip_if_rocm("ROCm enablement in progress") def test_mm_int4wo(self, device, dtype): weight = torch.randn(512, 1024).to(device).to(dtype) From fc23fba78de7e9dfe96803137aca999fc4291bfc Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 12:10:35 +0800 Subject: [PATCH 02/10] enable test/dtypes/test_affine_quantized.py on intel XPU --- test/dtypes/test_affine_quantized.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index e0669a576f..416e2547df 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -233,7 +233,7 @@ def test_print_quantized_module(self): ql = apply_quant(linear) assert "AffineQuantizedTensor" in str(ql) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @common_utils.parametrize( "apply_quant", get_quantization_functions(False, True, _DEVICE, False) ) @@ -339,7 +339,7 @@ def test_alias(self, device, dtype): @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16]) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @skip_if_no_cuda() @skip_if_rocm("ROCm enablement in progress") def test_slice_int4wo(self, device, dtype): # in_feature not divisible by 1024 @@ -453,7 +453,7 @@ def test_matmul(self, device, dtype): @common_utils.parametrize("device", [_DEVICE]) @common_utils.parametrize("dtype", [torch.bfloat16]) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @skip_if_no_cuda() @skip_if_rocm("ROCm enablement in progress") def test_slice_and_copy_int4wo(self, device, dtype): l = torch.nn.Linear(1024, 1024).to(_DEVICE).to(torch.bfloat16) From c1d33ccb6080d89043ce9a13bff4e10f08fdbb1b Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 12:25:58 +0800 Subject: [PATCH 03/10] enable test/dtypes/test_affine_quantized_float.py on intel XPU --- test/dtypes/test_affine_quantized_float.py | 95 +++++++++++----------- 1 file changed, 48 insertions(+), 47 deletions(-) diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 738e9b6164..4ab630f125 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -42,11 +42,12 @@ is_sm_at_least_89, is_sm_at_least_90, is_sm_version, + get_current_accelerator_device, ) random.seed(0) torch.manual_seed(0) - +_DEVICE = get_current_accelerator_device() class ToyLinearModel(torch.nn.Module): def __init__(self, in_features, out_features): @@ -61,7 +62,7 @@ def forward(self, x): class TestAffineQuantizedFloat8Compile(InductorTestCase): - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @@ -97,7 +98,7 @@ def test_fp8_linear_variants( with error_context: M, N, K = sizes - input_tensor = torch.randn(*M, K, dtype=dtype, device="cuda") + input_tensor = torch.randn(*M, K, dtype=dtype, device=_DEVICE) # Get a "reasonable" scale for the input tensor even though # we use the same scale for multiple activations scale, _ = choose_qparams_affine( @@ -122,7 +123,7 @@ def test_fp8_linear_variants( } # Create a linear layer with bfloat16 dtype - model = ToyLinearModel(K, N).eval().to(dtype).to("cuda") + model = ToyLinearModel(K, N).eval().to(dtype).to(_DEVICE) quantized_model = copy.deepcopy(model) factory = mode_map[mode]() @@ -170,29 +171,29 @@ class UnsupportedGranularity: granularity=(UnsupportedGranularity(), UnsupportedGranularity()), ) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_per_row_with_float32(self): with pytest.raises( AssertionError, match="PerRow quantization only works for bfloat16 precision", ): - model = ToyLinearModel(64, 64).eval().to(torch.float32).to("cuda") + model = ToyLinearModel(64, 64).eval().to(torch.float32).to(_DEVICE) quantize_( model, Float8DynamicActivationFloat8WeightConfig(granularity=PerRow()), ) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("mode", ["dynamic", "weight-only", "static"]) def test_serialization(self, mode: str): # Create and quantize the model - model = ToyLinearModel(16, 32).to(device="cuda") + model = ToyLinearModel(16, 32).to(device=_DEVICE) mode_map = { "dynamic": partial( @@ -203,7 +204,7 @@ def test_serialization(self, mode: str): "weight-only": partial(Float8WeightOnlyConfig, version=1), "static": partial( Float8StaticActivationFloat8WeightConfig, - scale=torch.tensor(1.0, dtype=torch.float32, device="cuda"), + scale=torch.tensor(1.0, dtype=torch.float32, device=_DEVICE), granularity=PerTensor(), ), } @@ -260,13 +261,13 @@ def test_serialization(self, mode: str): original_layer.weight.scale, new_layer.weight.scale ), f"Scales do not match for {layer_name}" - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_fp8_weight_dimension_warning(self): # Create model with incompatible dimensions (not multiples of 16) - model = ToyLinearModel(10, 25).cuda() # 10x25 and 25x10 weights + model = ToyLinearModel(10, 25).to(_DEVICE) # 10x25 and 25x10 weights # Set up logging capture with self.assertLogs( @@ -298,9 +299,9 @@ def test_fp8_weight_dimension_warning(self): f"Expected warning message containing: {expected}", ) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize( "in_features,out_features", [(512, 1024), (256, 768), (1024, 512)] @@ -312,7 +313,7 @@ def test_fp8_weight_dimension_warning(self): def test_mm_float8dq_per_row( self, in_features, out_features, leading_shape, bias: bool ): - device = "cuda" + device = _DEVICE dtype = torch.bfloat16 input_shape = leading_shape + (in_features,) @@ -353,15 +354,15 @@ def test_mm_float8dq_per_row( error = compute_error(ref_output, quant_output) assert error > 20, f"Quantization error is too high got a SQNR of {error}" - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) def test_choose_scale_float8_bounds(self, float8_dtype, output_dtype): block_size = () - device = "cuda" + device = _DEVICE input_tensor = torch.randn(8, 64, device=device, dtype=torch.float32) # testing upper bounds @@ -396,9 +397,9 @@ def test_choose_scale_float8_bounds(self, float8_dtype, output_dtype): # since scale = abs_max / quant_max, larger abs_max means scale is larger self.assertTrue(scale_ref < scale_with_lb) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -406,7 +407,7 @@ def test_choose_scale_float8_bounds(self, float8_dtype, output_dtype): def test_dequantize_affine_float8(self, float8_dtype, output_dtype, block_size): """Test _dequantize_affine_float8 with various configurations""" - device = "cuda" + device = _DEVICE input_tensor = torch.randn(8, 64, device=device, dtype=torch.float32) # Choose quantization parameters @@ -429,13 +430,13 @@ def test_dequantize_affine_float8(self, float8_dtype, output_dtype, block_size): error = torch.abs(input_tensor.to(output_dtype) - dequantized).mean() self.assertLess(error, 0.1, "Quantization error too high") - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_dequantize_affine_float8_scale_broadcasting(self): """Test that scale broadcasting works correctly for block-wise quantization""" - device = "cuda" + device = _DEVICE # Create input tensor with known block structure input_tensor = torch.randn(4, 32, device=device, dtype=torch.float32) block_size = (2, 16) # 2x2 blocks in first dim, 2x16 blocks in second dim @@ -461,14 +462,14 @@ def test_dequantize_affine_float8_scale_broadcasting(self): # Verify shapes match self.assertEqual(dequantized.shape, input_tensor.shape) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("granularity", [PerTensor(), PerRow()]) def test_float8_tensor_slicing_basic(self, granularity): """Test basic slicing operations on Float8 tensors""" - device = "cuda" + device = _DEVICE dtype = torch.bfloat16 # Create and quantize a model @@ -499,13 +500,13 @@ def test_float8_tensor_slicing_basic(self, granularity): self.assertTrue(isinstance(sliced_1, Float8AQTTensorImpl)) self.assertTrue(isinstance(sliced_both, Float8AQTTensorImpl)) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_float8_tensor_slicing_per_tensor(self): """Test slicing with per-tensor quantization (scale should not change)""" - device = "cuda" + device = _DEVICE dtype = torch.bfloat16 # Create and quantize with per-tensor granularity @@ -529,17 +530,17 @@ def test_float8_tensor_slicing_per_tensor(self): self.assertTrue(torch.equal(original_scale, sliced_impl.scale)) self.assertEqual(sliced_impl.scale.numel(), 1) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @unittest.skipIf( - not is_sm_at_least_90(), + _DEVICE == "cuda" and not is_sm_at_least_90(), "Per-row quantization requires compute capability >= 9.0", ) def test_float8_tensor_slicing_per_row(self): """Test slicing with per-row quantization (scale should be sliced appropriately)""" - device = "cuda" + device = _DEVICE dtype = torch.bfloat16 # Create and quantize with per-row granularity @@ -572,13 +573,13 @@ def test_float8_tensor_slicing_per_row(self): self.assertEqual(sliced_cols_impl.scale.shape, (32, 1)) self.assertTrue(torch.equal(sliced_cols_impl.scale, original_scale)) - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_float8_tensor_slicing_edge_cases(self): """Test edge cases in slicing""" - device = "cuda" + device = _DEVICE dtype = torch.bfloat16 # Create and quantize a model @@ -604,18 +605,18 @@ def test_float8_tensor_slicing_edge_cases(self): large_slice = original_weight[:100] # More than available rows self.assertEqual(large_slice.shape, (32, 64)) # Should clamp to available - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("granularity", [PerTensor(), PerRow()]) @unittest.skipIf( - is_sm_version(8, 9), + _DEVICE == "cuda" and is_sm_version(8, 9), "TODO: AssertionError: tensor(-2.1562, device='cuda:0', dtype=torch.bfloat16) not greater than 15", ) def test_float8_tensor_slicing_functional_correctness(self, granularity): """Test that sliced tensors produce correct results in computations""" - device = "cuda" + device = _DEVICE dtype = torch.bfloat16 # Create reference and quantized models with dimensions that are multiples of 16 @@ -776,9 +777,9 @@ def test_quantize_dequantize_fp8_inductor(self, float8_dtype, hp_dtype): torch.testing.assert_close(expected_dequantized, test_dq) @torch.no_grad() - @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - not is_sm_at_least_90(), "Requires GPU with compute capability >= 9.0" + _DEVICE == "cuda" and not is_sm_at_least_90(), "Requires GPU with compute capability >= 9.0" ) @common_utils.parametrize("granularity", [PerTensor(), PerRow()]) @common_utils.parametrize("float8_config_version", [1, 2]) @@ -791,7 +792,7 @@ def test_expected_kernels_on_gpu(self, granularity, float8_config_version): M, K, N = 128, 256, 512 m = torch.nn.Sequential( - torch.nn.Linear(K, N, device="cuda", dtype=torch.bfloat16) + torch.nn.Linear(K, N, device=_DEVICE, dtype=torch.bfloat16) ) if float8_config_version == 1: config = Float8DynamicActivationFloat8WeightConfig( @@ -810,7 +811,7 @@ def test_expected_kernels_on_gpu(self, granularity, float8_config_version): ) m = torch.compile(m) - x = torch.randn(M, K, device="cuda", dtype=torch.bfloat16) + x = torch.randn(M, K, device=_DEVICE, dtype=torch.bfloat16) out, code = run_and_get_code(m, x) # triton kernel call looks like: From 96ae48e97ebeb8087dba7d44f020e7eb21ac53a1 Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:15:44 +0800 Subject: [PATCH 04/10] enable test/dtypes/test_affine_quantized_float.py on intel XPU --- test/dtypes/test_affine_quantized_float.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 4ab630f125..9ef2a06574 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -299,9 +299,9 @@ def test_fp8_weight_dimension_warning(self): f"Expected warning message containing: {expected}", ) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize( "in_features,out_features", [(512, 1024), (256, 768), (1024, 512)] @@ -462,9 +462,9 @@ def test_dequantize_affine_float8_scale_broadcasting(self): # Verify shapes match self.assertEqual(dequantized.shape, input_tensor.shape) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("granularity", [PerTensor(), PerRow()]) def test_float8_tensor_slicing_basic(self, granularity): @@ -777,9 +777,9 @@ def test_quantize_dequantize_fp8_inductor(self, float8_dtype, hp_dtype): torch.testing.assert_close(expected_dequantized, test_dq) @torch.no_grad() - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_90(), "Requires GPU with compute capability >= 9.0" + not is_sm_at_least_90(), "Requires GPU with compute capability >= 9.0" ) @common_utils.parametrize("granularity", [PerTensor(), PerRow()]) @common_utils.parametrize("float8_config_version", [1, 2]) From 32df181678eee5a57eebd7fa5b82dbae6be2055c Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:21:46 +0800 Subject: [PATCH 05/10] enable test/dtypes/test_affine_quantized_float.py on intel XPU --- test/dtypes/test_affine_quantized_float.py | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 9ef2a06574..40e1620536 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -186,9 +186,9 @@ def test_per_row_with_float32(self): Float8DynamicActivationFloat8WeightConfig(granularity=PerRow()), ) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("mode", ["dynamic", "weight-only", "static"]) def test_serialization(self, mode: str): @@ -500,9 +500,9 @@ def test_float8_tensor_slicing_basic(self, granularity): self.assertTrue(isinstance(sliced_1, Float8AQTTensorImpl)) self.assertTrue(isinstance(sliced_both, Float8AQTTensorImpl)) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_float8_tensor_slicing_per_tensor(self): """Test slicing with per-tensor quantization (scale should not change)""" @@ -530,12 +530,12 @@ def test_float8_tensor_slicing_per_tensor(self): self.assertTrue(torch.equal(original_scale, sliced_impl.scale)) self.assertEqual(sliced_impl.scale.numel(), 1) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_90(), + not is_sm_at_least_90(), "Per-row quantization requires compute capability >= 9.0", ) def test_float8_tensor_slicing_per_row(self): @@ -573,9 +573,9 @@ def test_float8_tensor_slicing_per_row(self): self.assertEqual(sliced_cols_impl.scale.shape, (32, 1)) self.assertTrue(torch.equal(sliced_cols_impl.scale, original_scale)) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_float8_tensor_slicing_edge_cases(self): """Test edge cases in slicing""" @@ -605,13 +605,13 @@ def test_float8_tensor_slicing_edge_cases(self): large_slice = original_weight[:100] # More than available rows self.assertEqual(large_slice.shape, (32, 64)) # Should clamp to available - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("granularity", [PerTensor(), PerRow()]) @unittest.skipIf( - _DEVICE == "cuda" and is_sm_version(8, 9), + is_sm_version(8, 9), "TODO: AssertionError: tensor(-2.1562, device='cuda:0', dtype=torch.bfloat16) not greater than 15", ) def test_float8_tensor_slicing_functional_correctness(self, granularity): From 2ed02da7899df3026e9d0cd64169a42a44f43aa8 Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:24:19 +0800 Subject: [PATCH 06/10] enable test/dtypes/test_affine_quantized_float.py on intel XPU --- test/dtypes/test_affine_quantized_float.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 40e1620536..4d3d9d0e2a 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -171,9 +171,9 @@ class UnsupportedGranularity: granularity=(UnsupportedGranularity(), UnsupportedGranularity()), ) - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_per_row_with_float32(self): with pytest.raises( @@ -261,9 +261,9 @@ def test_serialization(self, mode: str): original_layer.weight.scale, new_layer.weight.scale ), f"Scales do not match for {layer_name}" - @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") + @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_fp8_weight_dimension_warning(self): # Create model with incompatible dimensions (not multiples of 16) From 234f13a85875a891362d6a9af0fadbaf58bc452b Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:30:38 +0800 Subject: [PATCH 07/10] enable test/dtypes/test_affine_quantized_float.py on intel XPU --- test/dtypes/test_affine_quantized_float.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 4d3d9d0e2a..3d87f7e884 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -141,14 +141,14 @@ def test_fp8_linear_variants( ) @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_invalid_granularity(self): with pytest.raises(ValueError, match="Invalid granularity specification"): Float8DynamicActivationFloat8WeightConfig(granularity="invalid") @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_mismatched_granularity(self): with pytest.raises( @@ -160,7 +160,7 @@ def test_mismatched_granularity(self): ) @unittest.skipIf( - not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_unsupported_granularity(self): class UnsupportedGranularity: From 4885a1acecc59fa511256efc869411ab06fca38b Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:32:10 +0800 Subject: [PATCH 08/10] fix format issue --- test/dtypes/test_affine_quantized.py | 1 + test/dtypes/test_affine_quantized_float.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index 416e2547df..3d33374924 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -50,6 +50,7 @@ ) _DEVICE = get_current_accelerator_device() + def get_quantization_functions( do_sparse: bool, do_int4: bool, device: str = _DEVICE, int4_zp_int: bool = False ): diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 3d87f7e884..2d1a642c42 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -49,6 +49,7 @@ torch.manual_seed(0) _DEVICE = get_current_accelerator_device() + class ToyLinearModel(torch.nn.Module): def __init__(self, in_features, out_features): super().__init__() @@ -141,14 +142,16 @@ def test_fp8_linear_variants( ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_invalid_granularity(self): with pytest.raises(ValueError, match="Invalid granularity specification"): Float8DynamicActivationFloat8WeightConfig(granularity="invalid") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_mismatched_granularity(self): with pytest.raises( @@ -160,7 +163,8 @@ def test_mismatched_granularity(self): ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_unsupported_granularity(self): class UnsupportedGranularity: @@ -356,7 +360,8 @@ def test_mm_float8dq_per_row( @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -399,7 +404,8 @@ def test_choose_scale_float8_bounds(self, float8_dtype, output_dtype): @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -432,7 +438,8 @@ def test_dequantize_affine_float8(self, float8_dtype, output_dtype, block_size): @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_dequantize_affine_float8_scale_broadcasting(self): """Test that scale broadcasting works correctly for block-wise quantization""" From 4e46f122caacbf44dcc74f85b15715fb8a95286e Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:37:48 +0800 Subject: [PATCH 09/10] fix format issue --- test/dtypes/test_affine_quantized.py | 3 +-- test/dtypes/test_affine_quantized_float.py | 21 +++++++-------------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index 3d33374924..00cfa272d8 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -39,10 +39,10 @@ from torchao.utils import ( check_cpu_version, check_xpu_version, + get_current_accelerator_device, is_fbcode, is_ROCM, is_sm_at_least_89, - get_current_accelerator_device, ) is_cusparselt_available = ( @@ -50,7 +50,6 @@ ) _DEVICE = get_current_accelerator_device() - def get_quantization_functions( do_sparse: bool, do_int4: bool, device: str = _DEVICE, int4_zp_int: bool = False ): diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index 2d1a642c42..c75e29b66b 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -39,17 +39,16 @@ ) from torchao.quantization.quantize_.common import KernelPreference from torchao.utils import ( + get_current_accelerator_device, is_sm_at_least_89, is_sm_at_least_90, is_sm_version, - get_current_accelerator_device, ) random.seed(0) torch.manual_seed(0) _DEVICE = get_current_accelerator_device() - class ToyLinearModel(torch.nn.Module): def __init__(self, in_features, out_features): super().__init__() @@ -142,16 +141,14 @@ def test_fp8_linear_variants( ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), - "Requires GPU with compute capability >= 8.9", + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_invalid_granularity(self): with pytest.raises(ValueError, match="Invalid granularity specification"): Float8DynamicActivationFloat8WeightConfig(granularity="invalid") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), - "Requires GPU with compute capability >= 8.9", + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_mismatched_granularity(self): with pytest.raises( @@ -163,8 +160,7 @@ def test_mismatched_granularity(self): ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), - "Requires GPU with compute capability >= 8.9", + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_unsupported_granularity(self): class UnsupportedGranularity: @@ -360,8 +356,7 @@ def test_mm_float8dq_per_row( @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), - "Requires GPU with compute capability >= 8.9", + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -404,8 +399,7 @@ def test_choose_scale_float8_bounds(self, float8_dtype, output_dtype): @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), - "Requires GPU with compute capability >= 8.9", + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -438,8 +432,7 @@ def test_dequantize_affine_float8(self, float8_dtype, output_dtype, block_size): @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), - "Requires GPU with compute capability >= 8.9", + _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" ) def test_dequantize_affine_float8_scale_broadcasting(self): """Test that scale broadcasting works correctly for block-wise quantization""" From 060c9e9370bc00518481af659df0bbab7e88889b Mon Sep 17 00:00:00 2001 From: "Zeng, Xiangdong" Date: Fri, 21 Nov 2025 13:39:40 +0800 Subject: [PATCH 10/10] fix format issue --- test/dtypes/test_affine_quantized.py | 1 + test/dtypes/test_affine_quantized_float.py | 19 +++++++++++++------ 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index 00cfa272d8..20cf1c6311 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -50,6 +50,7 @@ ) _DEVICE = get_current_accelerator_device() + def get_quantization_functions( do_sparse: bool, do_int4: bool, device: str = _DEVICE, int4_zp_int: bool = False ): diff --git a/test/dtypes/test_affine_quantized_float.py b/test/dtypes/test_affine_quantized_float.py index c75e29b66b..54820cb5b3 100644 --- a/test/dtypes/test_affine_quantized_float.py +++ b/test/dtypes/test_affine_quantized_float.py @@ -49,6 +49,7 @@ torch.manual_seed(0) _DEVICE = get_current_accelerator_device() + class ToyLinearModel(torch.nn.Module): def __init__(self, in_features, out_features): super().__init__() @@ -141,14 +142,16 @@ def test_fp8_linear_variants( ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_invalid_granularity(self): with pytest.raises(ValueError, match="Invalid granularity specification"): Float8DynamicActivationFloat8WeightConfig(granularity="invalid") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_mismatched_granularity(self): with pytest.raises( @@ -160,7 +163,8 @@ def test_mismatched_granularity(self): ) @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_unsupported_granularity(self): class UnsupportedGranularity: @@ -356,7 +360,8 @@ def test_mm_float8dq_per_row( @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -399,7 +404,8 @@ def test_choose_scale_float8_bounds(self, float8_dtype, output_dtype): @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) @common_utils.parametrize("float8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2]) @common_utils.parametrize("output_dtype", [torch.float32, torch.bfloat16]) @@ -432,7 +438,8 @@ def test_dequantize_affine_float8(self, float8_dtype, output_dtype, block_size): @unittest.skipIf(not torch.accelerator.is_available(), "Need GPU available") @unittest.skipIf( - _DEVICE == "cuda" and not is_sm_at_least_89(), "Requires GPU with compute capability >= 8.9" + _DEVICE == "cuda" and not is_sm_at_least_89(), + "Requires GPU with compute capability >= 8.9", ) def test_dequantize_affine_float8_scale_broadcasting(self): """Test that scale broadcasting works correctly for block-wise quantization"""