From a03e6cf9cae1ec97021ef57e6580e53a05de3374 Mon Sep 17 00:00:00 2001
From: Github Executorch <github_executorch@arm.com>
Date: Sun, 2 Nov 2025 22:20:48 -0800
Subject: [PATCH] Summary: ARM backend: Add skip markers for 16A8W quantization
 known test failures

Added skip functionality (because buck based targets do not seem to comply with xfail marker) to the test parametrize framework to handle known failing tests in ARM backend 16A8W quantization. The model_linear_rank4_zeros, model_linear_rank4_negative_ones, and model_linear_rank4_negative_large_rand test cases are now skipped due to bias quantization accuracy issues tracked in MLETORCH-1452. This prevents buck based CI from blocking while maintaining visibility of the known issues through proper test annotations.

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 backends/arm/test/common.py          |  6 ++++++
 backends/arm/test/ops/test_linear.py | 10 ++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py
index 9fd85b5f283..b9dc9b00725 100644
--- a/backends/arm/test/common.py
+++ b/backends/arm/test/common.py
@@ -237,6 +237,7 @@ def parametrize(
     arg_name: str,
     test_data: dict[str, Any],
     xfails: dict[str, xfail_type] | None = None,
+    skips: dict[str, str] | None = None,
     strict: bool = True,
     flakies: dict[str, int] | None = None,
 ) -> Decorator:
@@ -249,6 +250,8 @@ def parametrize(
     """
     if xfails is None:
         xfails = {}
+    if skips is None:
+        skips = {}
     if flakies is None:
         flakies = {}
 
@@ -259,6 +262,9 @@ def decorator_func(func: Callable[_P, _R]) -> Callable[_P, _R]:
             if id in flakies:
                 # Mark this parameter as flaky with given reruns
                 marker = (pytest.mark.flaky(reruns=flakies[id]),)
+            elif id in skips:
+                # fail markers do not work with 'buck' based ci, so use skip instead
+                marker = (pytest.mark.skip(reason=skips[id]),)
             elif id in xfails:
                 xfail_info = xfails[id]
                 reason = ""
diff --git a/backends/arm/test/ops/test_linear.py b/backends/arm/test/ops/test_linear.py
index 952befeeffa..b63d7c10b34 100644
--- a/backends/arm/test/ops/test_linear.py
+++ b/backends/arm/test/ops/test_linear.py
@@ -308,18 +308,24 @@ def test_linear_16a8w_tosa_INT(test_data: torch.Tensor):
 
 
 x_fails = {}
+x_skips = {}
+
 for test_name in [
     "model_linear_rank4_zeros",
     "model_linear_rank4_negative_ones",
     "model_linear_rank4_negative_large_rand",
 ]:
     for set_per_chan in ["True", "False"]:
-        x_fails[test_name + ",per_channel_quant={}".format(set_per_chan)] = (
+        key = test_name + ",per_channel_quant={}".format(set_per_chan)
+        reason = (
             "MLETORCH-1452: AssertionError: Output 0 does not match reference output."
         )
+        x_fails[key] = reason
+        # TODO: Check why xfail doesn't work for this buck target. In the interim rely on skip
+        x_skips[key] = reason
 
 
-@common.parametrize("test_data", test_data_all_16a8w, x_fails)
+@common.parametrize("test_data", test_data_all_16a8w, xfails=x_fails, skips=x_skips)
 @common.XfailIfNoCorstone300
 def test_linear_16a8w_u55_INT16(test_data: torch.Tensor):
     """Test linear operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""