From 58e71468ab99b1304be534a20da6d018a98d13f0 Mon Sep 17 00:00:00 2001
From: Gregory James Comer <gjcomer@meta.com>
Date: Tue, 16 Sep 2025 16:47:15 -0700
Subject: [PATCH 1/4] Update

[ghstack-poisoned]
---
 .../test/suite/generate_markdown_summary.py   | 225 +++++++++++++-----
 backends/test/suite/operators/__init__.py     |   1 +
 backends/test/suite/reporting.py              |   5 +-
 backends/test/suite/runner.py                 |   2 +-
 4 files changed, 168 insertions(+), 65 deletions(-)
diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py
index 73da8fba678..9e330ca6626 100644
--- a/backends/test/suite/generate_markdown_summary.py
+++ b/backends/test/suite/generate_markdown_summary.py
@@ -1,44 +1,69 @@
 import argparse
 import csv
+import json
 import sys
 
-#
-# A standalone script to generate a Markdown representation of a test report.
-# This is primarily intended to be used with GitHub actions to generate a nice
-# representation of the test results when looking at the action run.
-#
-# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
-# Markdown is written to stdout.
-#
+from dataclasses import dataclass, field
 
 
-def escape_for_markdown(text: str) -> str:
+@dataclass
+class ResultCounts:
     """
-    Modify a string to properly display in a markdown table cell.
+    Represents aggregated result counts for each status.
     """
-    if not text:
-        return text
 
-    # Replace newlines with <br /> tags
-    escaped = text.replace("\n", "<br />")
+    total: int = 0
+    passes: int = 0
+    fails: int = 0
+    skips: int = 0
+    by_detail: dict[str, int] = field(default_factory=lambda: {})
 
-    # Escape backslashes.
-    escaped = escaped.replace("\\", "\\\\")
+    def add_row(self, result_value: str, result_detail: str) -> None:
+        """
+        Update the result counts for the specified row.
+        """
 
-    # Escape pipe characters that would break table structure
-    escaped = escaped.replace("|", "\\|")
+        self.total += 1
 
-    return escaped
+        if result_value == "Pass":
+            self.passes += 1
+        elif result_value == "Fail":
+            self.fails += 1
+        elif result_value == "Skip":
+            self.skips += 1
+        else:
+            raise RuntimeError(f"Unknown result value {result_value}")
 
+        if result_detail:
+            if result_detail not in self.by_detail:
+                self.by_detail[result_detail] = 0
+
+            self.by_detail[result_detail] += 1
+
+
+@dataclass
+class AggregatedSummary:
+    """
+    Represents aggegrated summary data for the test run.
+    """
+
+    counts: ResultCounts
+    counts_by_params: dict[str, ResultCounts]
+    failed_tests: list[list[str]]
+    header: list[str]
+
+
+#
+# A standalone script to generate a Markdown representation of a test report.
+# This is primarily intended to be used with GitHub actions to generate a nice
+# representation of the test results when looking at the action run.
+#
+# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
+# Markdown is written to stdout.
+#
 
-def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
-    # Print warning if exit code is non-zero
-    if exit_code != 0:
-        print("> [!WARNING]")
-        print(
-            f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
-        )
 
+def aggregate_results(csv_path: str) -> AggregatedSummary:
     with open(csv_path, newline="", encoding="utf-8") as f:
         reader = csv.reader(f)
         rows = list(reader)
@@ -46,24 +71,28 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     header = rows[0]
     data_rows = rows[1:]
 
-    # Find the Result and Result Detail column indices
-    result_column_index = None
-    result_detail_column_index = None
-    for i, col in enumerate(header):
-        if col.lower() == "result":
-            result_column_index = i
-        elif col.lower() == "result detail":
-            result_detail_column_index = i
+    header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)}
+    params_column_index = header_indices_by_name.get("params", None)
+    result_column_index = header_indices_by_name["result"]
+    result_detail_column_index = header_indices_by_name["result detail"]
 
     # Count results and prepare data
-    pass_count = 0
-    fail_count = 0
-    skip_count = 0
+    counts = ResultCounts()
     failed_tests = []
-    processed_rows = []
-    result_detail_counts = {}
+    counts_by_param = {}
 
     for row in data_rows:
+        result = row[result_column_index]
+        result_detail = row[result_detail_column_index]
+
+        counts.add_row(result, result_detail)
+
+        params = row[params_column_index] if params_column_index else None
+        if params:
+            if params not in counts_by_param:
+                counts_by_param[params] = ResultCounts()
+            counts_by_param[params].add_row(result, result_detail)
+
         # Make a copy of the row to avoid modifying the original
         processed_row = [escape_for_markdown(cell) for cell in row]
 
@@ -71,54 +100,124 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
         if result_column_index is not None and result_column_index < len(row):
             result_value = row[result_column_index].strip().lower()
             if result_value == "pass":
-                pass_count += 1
                 processed_row[result_column_index] = (
                     '<span style="color:green">Pass</span>'
                 )
             elif result_value == "fail":
-                fail_count += 1
                 processed_row[result_column_index] = (
                     '<span style="color:red">Fail</span>'
                 )
                 failed_tests.append(processed_row.copy())
             elif result_value == "skip":
-                skip_count += 1
                 processed_row[result_column_index] = (
                     '<span style="color:gray">Skip</span>'
                 )
 
-        # Count result details (excluding empty ones)
-        if result_detail_column_index is not None and result_detail_column_index < len(
-            row
-        ):
-            result_detail_value = row[result_detail_column_index].strip()
-            if result_detail_value:  # Only count non-empty result details
-                if result_detail_value in result_detail_counts:
-                    result_detail_counts[result_detail_value] += 1
-                else:
-                    result_detail_counts[result_detail_value] = 1
+    return AggregatedSummary(
+        counts=counts,
+        failed_tests=failed_tests,
+        counts_by_params=counts_by_param,
+        header=header,
+    )
+
+
+def escape_for_markdown(text: str) -> str:
+    """
+    Modify a string to properly display in a markdown table cell.
+    """
+    if not text:
+        return text
+
+    # Replace newlines with <br /> tags
+    escaped = text.replace("\n", "<br />")
+
+    # Escape backslashes.
+    escaped = escaped.replace("\\", "\\\\")
+
+    # Escape pipe characters that would break table structure
+    escaped = escaped.replace("|", "\\|")
+
+    return escaped
+
+
+def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
+    # Print warning if exit code is non-zero
+    if exit_code != 0:
+        print("> [!WARNING]")
+        print(
+            f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
+        )
 
-        processed_rows.append(processed_row)
+    results = aggregate_results(csv_path)
 
     # Generate Summary section
-    total_rows = len(data_rows)
+    total_rows = results.counts.total
     print("# Summary\n")
-    print(f"- **Pass**: {pass_count}/{total_rows}")
-    print(f"- **Fail**: {fail_count}/{total_rows}")
-    print(f"- **Skip**: {skip_count}/{total_rows}")
+    print(f"- **Pass**: {results.counts.passes}/{total_rows}")
+    print(f"- **Fail**: {results.counts.fails}/{total_rows}")
+    print(f"- **Skip**: {results.counts.skips}/{total_rows}")
+
+    if results.counts_by_params:
+        print("\n## Results by Parameters\n")
+
+        # Extract all unique parameter keys from the JSON strings
+        all_param_keys = set()
+        parsed_params = {}
+
+        for params_str in results.counts_by_params.keys():
+            # Parse the JSON string (it's a string representation of a dict)
+            params_dict = json.loads(params_str)
+            parsed_params[params_str] = params_dict
+            all_param_keys.update(params_dict.keys())
+
+        if parsed_params:
+            # Sort parameter keys for consistent column ordering
+            sorted_param_keys = sorted(all_param_keys)
+
+            # Create table header
+            header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
+            print("| " + " | ".join(header_cols) + " |")
+            print("|" + "|".join(["---"] * len(header_cols)) + "|")
+
+            # Create table rows
+            for params_str, counts in results.counts_by_params.items():
+                if params_str in parsed_params:
+                    params_dict = parsed_params[params_str]
+                    row_values = []
+
+                    # Add parameter values
+                    for key in sorted_param_keys:
+                        value = params_dict.get(key, "")
+                        row_values.append(str(value))
+
+                    pass_fraction = counts.passes / (counts.passes + counts.fails)
+
+                    # Add count values
+                    row_values.extend(
+                        [
+                            str(counts.passes),
+                            str(counts.fails),
+                            str(counts.skips),
+                            f"{pass_fraction*100:.2f}%",
+                        ]
+                    )
+
+                    print("| " + " | ".join(row_values) + " |")
+
+        print()
 
     print("## Failure Breakdown:")
-    total_rows_with_result_detail = sum(result_detail_counts.values())
-    for detail, count in sorted(result_detail_counts.items()):
+    total_rows_with_result_detail = sum(results.counts.by_detail.values())
+    for detail, count in sorted(results.counts.by_detail.items()):
         print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")
 
     # Generate Failed Tests section
     print("# Failed Tests\n")
-    if failed_tests:
-        escaped_header = [escape_for_markdown(col) for col in header]
+    if results.failed_tests:
+        escaped_header = [escape_for_markdown(col) for col in results.header]
         print("| " + " | ".join(escaped_header) + " |")
-        print("|" + "|".join(["---"] * len(header)) + "|")
-        for row in failed_tests:
+        print("|" + "|".join(["---"] * len(results.header)) + "|")
+        for row in results.failed_tests:
             print("| " + " | ".join(row) + " |")
     else:
         print("No failed tests.\n")
diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py
index 9c550b3a49c..9a40d37e46e 100644
--- a/backends/test/suite/operators/__init__.py
+++ b/backends/test/suite/operators/__init__.py
@@ -171,6 +171,7 @@ def _test_op(
             if run_summary.result.is_backend_failure():
                 raise RuntimeError("Test failure.") from run_summary.error
             else:
+                raise RuntimeError("Test: " + str(run_summary))
                 # Non-backend failure indicates a bad test. Mark as skipped.
                 raise unittest.SkipTest(
                     f"Test failed for reasons other than backend failure. Error: {run_summary.error}"
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
index cdf2ce870e1..09e950ab672 100644
--- a/backends/test/suite/reporting.py
+++ b/backends/test/suite/reporting.py
@@ -1,4 +1,5 @@
 import csv
+import json
 
 from collections import Counter
 from dataclasses import dataclass, field
@@ -343,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None:
 
 def _serialize_params(params: dict[str, Any] | None) -> str:
     if params is not None:
-        return str(dict(sorted(params.items())))
+        # Convert values to strings - JSON conversion doesn't like dtypes.
+        str_params = {k: str(v) for k, v in params.items()}
+        return json.dumps(str_params)
     else:
         return ""
 
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
index eeea09e0fc1..a6d7d07bce0 100644
--- a/backends/test/suite/runner.py
+++ b/backends/test/suite/runner.py
@@ -57,7 +57,7 @@ def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bo
             and node.target == exir_ops.edge.aten.convolution.default
         ):
             in_rank = node.args[0].meta["val"].dim()
-            if in_rank != 4:
+            if in_rank > 4:
                 return True
 
     return False

From c1c4ccdffdd9b6ce991c72d8cf1145c10f1ab037 Mon Sep 17 00:00:00 2001
From: Gregory James Comer <gjcomer@meta.com>
Date: Tue, 16 Sep 2025 16:50:11 -0700
Subject: [PATCH 2/4] Update

[ghstack-poisoned]
---
 backends/test/suite/operators/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py
index 9a40d37e46e..9c550b3a49c 100644
--- a/backends/test/suite/operators/__init__.py
+++ b/backends/test/suite/operators/__init__.py
@@ -171,7 +171,6 @@ def _test_op(
             if run_summary.result.is_backend_failure():
                 raise RuntimeError("Test failure.") from run_summary.error
             else:
-                raise RuntimeError("Test: " + str(run_summary))
                 # Non-backend failure indicates a bad test. Mark as skipped.
                 raise unittest.SkipTest(
                     f"Test failed for reasons other than backend failure. Error: {run_summary.error}"

From 23f72864f2dc16cf9e8f6679df0f25fd39bd00e7 Mon Sep 17 00:00:00 2001
From: Gregory Comer <gregoryjcomer@gmail.com>
Date: Tue, 16 Sep 2025 18:54:25 -0700
Subject: [PATCH 3/4] Update

[ghstack-poisoned]
---
 backends/test/suite/tests/test_reporting.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py
index 58ff76cba17..e42681fc678 100644
--- a/backends/test/suite/tests/test_reporting.py
+++ b/backends/test/suite/tests/test_reporting.py
@@ -1,3 +1,4 @@
+import json
 import unittest
 
 from csv import DictReader
@@ -102,14 +103,16 @@ def test_csv_report_simple(self):
         self.assertEqual(records[2]["Test Case"], "test2")
         self.assertEqual(records[2]["Flow"], "flow1")
         self.assertEqual(records[2]["Result"], "Pass")
-        self.assertEqual(records[2]["Params"], str({"dtype": torch.float32}))
+        self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"}))
 
         # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
         self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
         self.assertEqual(records[3]["Test Case"], "test2")
         self.assertEqual(records[3]["Flow"], "flow1")
         self.assertEqual(records[3]["Result"], "Skip")
-        self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True}))
+        self.assertEqual(
+            records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"})
+        )
 
     def test_count_ops(self):
         """

From 9fd2133caa5c26767aaad0da5d080ac54aa20b97 Mon Sep 17 00:00:00 2001
From: Gregory James Comer <gjcomer@meta.com>
Date: Wed, 17 Sep 2025 11:09:30 -0700
Subject: [PATCH 4/4] Update

[ghstack-poisoned]
---
 .github/workflows/test-backend-arm.yml           |  2 ++
 .github/workflows/test-backend-coreml.yml        |  2 ++
 .github/workflows/test-backend-qnn.yml           |  2 ++
 .github/workflows/test-backend-vulkan.yml        |  2 ++
 .github/workflows/test-backend-xnnpack.yml       |  2 ++
 backends/test/suite/generate_markdown_summary.py | 16 +++++++++++-----
 6 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/test-backend-arm.yml b/.github/workflows/test-backend-arm.yml
index e57be2704a2..bee74fee172 100644
--- a/.github/workflows/test-backend-arm.yml
+++ b/.github/workflows/test-backend-arm.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: 0 2 * * *
   push:
+    branches:
+      - release/*
     tags:
       - ciflow/nightly/*
   pull_request:
diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml
index c6970ddff61..247f9576595 100644
--- a/.github/workflows/test-backend-coreml.yml
+++ b/.github/workflows/test-backend-coreml.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: 0 2 * * *
   push:
+    branches:
+      - release/*
     tags:
       - ciflow/nightly/*
   pull_request:
diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml
index 00933d6c74e..907c4d2dac0 100644
--- a/.github/workflows/test-backend-qnn.yml
+++ b/.github/workflows/test-backend-qnn.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: 0 2 * * *
   push:
+    branches:
+      - release/*
     tags:
       - ciflow/nightly/*
   pull_request:
diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml
index f04fdcdd1f1..cb2478fc825 100644
--- a/.github/workflows/test-backend-vulkan.yml
+++ b/.github/workflows/test-backend-vulkan.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: 0 2 * * *
   push:
+    branches:
+      - release/*
     tags:
       - ciflow/nightly/*
   pull_request:
diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml
index 2ae423dd99b..086c9625a38 100644
--- a/.github/workflows/test-backend-xnnpack.yml
+++ b/.github/workflows/test-backend-xnnpack.yml
@@ -4,6 +4,8 @@ on:
   schedule:
     - cron: 0 2 * * *
   push:
+    branches:
+      - release/*
     tags:
       - ciflow/nightly/*
   pull_request:
diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py
index 9e330ca6626..e54fc691723 100644
--- a/backends/test/suite/generate_markdown_summary.py
+++ b/backends/test/suite/generate_markdown_summary.py
@@ -151,11 +151,17 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     results = aggregate_results(csv_path)
 
     # Generate Summary section
-    total_rows = results.counts.total
     print("# Summary\n")
-    print(f"- **Pass**: {results.counts.passes}/{total_rows}")
-    print(f"- **Fail**: {results.counts.fails}/{total_rows}")
-    print(f"- **Skip**: {results.counts.skips}/{total_rows}")
+    total_excluding_skips = results.counts.passes + results.counts.fails
+    pass_fraction = results.counts.passes / total_excluding_skips
+    fail_fraction = results.counts.fails / total_excluding_skips
+    print(
+        f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
+    )
+    print(
+        f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
+    )
+    print(f"- **Skip**: {results.counts.skips}")
 
     if results.counts_by_params:
         print("\n## Results by Parameters\n")
@@ -170,7 +176,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
             parsed_params[params_str] = params_dict
             all_param_keys.update(params_dict.keys())
 
-        if parsed_params:
+        if parsed_params and len(parsed_params) > 1:
             # Sort parameter keys for consistent column ordering
             sorted_param_keys = sorted(all_param_keys)