From 58e71468ab99b1304be534a20da6d018a98d13f0 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 16 Sep 2025 16:47:15 -0700 Subject: [PATCH 1/4] Update [ghstack-poisoned] --- .../test/suite/generate_markdown_summary.py | 225 +++++++++++++----- backends/test/suite/operators/__init__.py | 1 + backends/test/suite/reporting.py | 5 +- backends/test/suite/runner.py | 2 +- 4 files changed, 168 insertions(+), 65 deletions(-) diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py index 73da8fba678..9e330ca6626 100644 --- a/backends/test/suite/generate_markdown_summary.py +++ b/backends/test/suite/generate_markdown_summary.py @@ -1,44 +1,69 @@ import argparse import csv +import json import sys -# -# A standalone script to generate a Markdown representation of a test report. -# This is primarily intended to be used with GitHub actions to generate a nice -# representation of the test results when looking at the action run. -# -# Usage: python executorch/backends/test/suite/generate_markdown_summary.py -# Markdown is written to stdout. -# +from dataclasses import dataclass, field -def escape_for_markdown(text: str) -> str: +@dataclass +class ResultCounts: """ - Modify a string to properly display in a markdown table cell. + Represents aggregated result counts for each status. """ - if not text: - return text - # Replace newlines with
tags - escaped = text.replace("\n", "
") + total: int = 0 + passes: int = 0 + fails: int = 0 + skips: int = 0 + by_detail: dict[str, int] = field(default_factory=lambda: {}) - # Escape backslashes. - escaped = escaped.replace("\\", "\\\\") + def add_row(self, result_value: str, result_detail: str) -> None: + """ + Update the result counts for the specified row. + """ - # Escape pipe characters that would break table structure - escaped = escaped.replace("|", "\\|") + self.total += 1 - return escaped + if result_value == "Pass": + self.passes += 1 + elif result_value == "Fail": + self.fails += 1 + elif result_value == "Skip": + self.skips += 1 + else: + raise RuntimeError(f"Unknown result value {result_value}") + if result_detail: + if result_detail not in self.by_detail: + self.by_detail[result_detail] = 0 + + self.by_detail[result_detail] += 1 + + +@dataclass +class AggregatedSummary: + """ + Represents aggegrated summary data for the test run. + """ + + counts: ResultCounts + counts_by_params: dict[str, ResultCounts] + failed_tests: list[list[str]] + header: list[str] + + +# +# A standalone script to generate a Markdown representation of a test report. +# This is primarily intended to be used with GitHub actions to generate a nice +# representation of the test results when looking at the action run. +# +# Usage: python executorch/backends/test/suite/generate_markdown_summary.py +# Markdown is written to stdout. +# -def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) - # Print warning if exit code is non-zero - if exit_code != 0: - print("> [!WARNING]") - print( - f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n" - ) +def aggregate_results(csv_path: str) -> AggregatedSummary: with open(csv_path, newline="", encoding="utf-8") as f: reader = csv.reader(f) rows = list(reader) @@ -46,24 +71,28 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) header = rows[0] data_rows = rows[1:] - # Find the Result and Result Detail column indices - result_column_index = None - result_detail_column_index = None - for i, col in enumerate(header): - if col.lower() == "result": - result_column_index = i - elif col.lower() == "result detail": - result_detail_column_index = i + header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)} + params_column_index = header_indices_by_name.get("params", None) + result_column_index = header_indices_by_name["result"] + result_detail_column_index = header_indices_by_name["result detail"] # Count results and prepare data - pass_count = 0 - fail_count = 0 - skip_count = 0 + counts = ResultCounts() failed_tests = [] - processed_rows = [] - result_detail_counts = {} + counts_by_param = {} for row in data_rows: + result = row[result_column_index] + result_detail = row[result_detail_column_index] + + counts.add_row(result, result_detail) + + params = row[params_column_index] if params_column_index else None + if params: + if params not in counts_by_param: + counts_by_param[params] = ResultCounts() + counts_by_param[params].add_row(result, result_detail) + # Make a copy of the row to avoid modifying the original processed_row = [escape_for_markdown(cell) for cell in row] @@ -71,54 +100,124 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) if result_column_index is not None and result_column_index < len(row): result_value = row[result_column_index].strip().lower() if result_value == "pass": - pass_count += 1 processed_row[result_column_index] = ( 'Pass' ) elif result_value == "fail": - fail_count += 1 processed_row[result_column_index] = ( 'Fail' ) failed_tests.append(processed_row.copy()) elif result_value == "skip": - skip_count += 1 processed_row[result_column_index] = ( 'Skip' ) - # Count result details (excluding empty ones) - if result_detail_column_index is not None and result_detail_column_index < len( - row - ): - result_detail_value = row[result_detail_column_index].strip() - if result_detail_value: # Only count non-empty result details - if result_detail_value in result_detail_counts: - result_detail_counts[result_detail_value] += 1 - else: - result_detail_counts[result_detail_value] = 1 + return AggregatedSummary( + counts=counts, + failed_tests=failed_tests, + counts_by_params=counts_by_param, + header=header, + ) + + +def escape_for_markdown(text: str) -> str: + """ + Modify a string to properly display in a markdown table cell. + """ + if not text: + return text + + # Replace newlines with
tags + escaped = text.replace("\n", "
") + + # Escape backslashes. + escaped = escaped.replace("\\", "\\\\") + + # Escape pipe characters that would break table structure + escaped = escaped.replace("|", "\\|") + + return escaped + + +def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) + # Print warning if exit code is non-zero + if exit_code != 0: + print("> [!WARNING]") + print( + f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n" + ) - processed_rows.append(processed_row) + results = aggregate_results(csv_path) # Generate Summary section - total_rows = len(data_rows) + total_rows = results.counts.total print("# Summary\n") - print(f"- **Pass**: {pass_count}/{total_rows}") - print(f"- **Fail**: {fail_count}/{total_rows}") - print(f"- **Skip**: {skip_count}/{total_rows}") + print(f"- **Pass**: {results.counts.passes}/{total_rows}") + print(f"- **Fail**: {results.counts.fails}/{total_rows}") + print(f"- **Skip**: {results.counts.skips}/{total_rows}") + + if results.counts_by_params: + print("\n## Results by Parameters\n") + + # Extract all unique parameter keys from the JSON strings + all_param_keys = set() + parsed_params = {} + + for params_str in results.counts_by_params.keys(): + # Parse the JSON string (it's a string representation of a dict) + params_dict = json.loads(params_str) + parsed_params[params_str] = params_dict + all_param_keys.update(params_dict.keys()) + + if parsed_params: + # Sort parameter keys for consistent column ordering + sorted_param_keys = sorted(all_param_keys) + + # Create table header + header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"] + print("| " + " | ".join(header_cols) + " |") + print("|" + "|".join(["---"] * len(header_cols)) + "|") + + # Create table rows + for params_str, counts in results.counts_by_params.items(): + if params_str in parsed_params: + params_dict = parsed_params[params_str] + row_values = [] + + # Add parameter values + for key in sorted_param_keys: + value = params_dict.get(key, "") + row_values.append(str(value)) + + pass_fraction = counts.passes / (counts.passes + counts.fails) + + # Add count values + row_values.extend( + [ + str(counts.passes), + str(counts.fails), + str(counts.skips), + f"{pass_fraction*100:.2f}%", + ] + ) + + print("| " + " | ".join(row_values) + " |") + + print() print("## Failure Breakdown:") - total_rows_with_result_detail = sum(result_detail_counts.values()) - for detail, count in sorted(result_detail_counts.items()): + total_rows_with_result_detail = sum(results.counts.by_detail.values()) + for detail, count in sorted(results.counts.by_detail.items()): print(f"- **{detail}**: {count}/{total_rows_with_result_detail}") # Generate Failed Tests section print("# Failed Tests\n") - if failed_tests: - escaped_header = [escape_for_markdown(col) for col in header] + if results.failed_tests: + escaped_header = [escape_for_markdown(col) for col in results.header] print("| " + " | ".join(escaped_header) + " |") - print("|" + "|".join(["---"] * len(header)) + "|") - for row in failed_tests: + print("|" + "|".join(["---"] * len(results.header)) + "|") + for row in results.failed_tests: print("| " + " | ".join(row) + " |") else: print("No failed tests.\n") diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 9c550b3a49c..9a40d37e46e 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -171,6 +171,7 @@ def _test_op( if run_summary.result.is_backend_failure(): raise RuntimeError("Test failure.") from run_summary.error else: + raise RuntimeError("Test: " + str(run_summary)) # Non-backend failure indicates a bad test. Mark as skipped. raise unittest.SkipTest( f"Test failed for reasons other than backend failure. Error: {run_summary.error}" diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index cdf2ce870e1..09e950ab672 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,4 +1,5 @@ import csv +import json from collections import Counter from dataclasses import dataclass, field @@ -343,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None: def _serialize_params(params: dict[str, Any] | None) -> str: if params is not None: - return str(dict(sorted(params.items()))) + # Convert values to strings - JSON conversion doesn't like dtypes. + str_params = {k: str(v) for k, v in params.items()} + return json.dumps(str_params) else: return "" diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index eeea09e0fc1..a6d7d07bce0 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -57,7 +57,7 @@ def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bo and node.target == exir_ops.edge.aten.convolution.default ): in_rank = node.args[0].meta["val"].dim() - if in_rank != 4: + if in_rank > 4: return True return False From c1c4ccdffdd9b6ce991c72d8cf1145c10f1ab037 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Tue, 16 Sep 2025 16:50:11 -0700 Subject: [PATCH 2/4] Update [ghstack-poisoned] --- backends/test/suite/operators/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 9a40d37e46e..9c550b3a49c 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -171,7 +171,6 @@ def _test_op( if run_summary.result.is_backend_failure(): raise RuntimeError("Test failure.") from run_summary.error else: - raise RuntimeError("Test: " + str(run_summary)) # Non-backend failure indicates a bad test. Mark as skipped. raise unittest.SkipTest( f"Test failed for reasons other than backend failure. Error: {run_summary.error}" From 23f72864f2dc16cf9e8f6679df0f25fd39bd00e7 Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Tue, 16 Sep 2025 18:54:25 -0700 Subject: [PATCH 3/4] Update [ghstack-poisoned] --- backends/test/suite/tests/test_reporting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 58ff76cba17..e42681fc678 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -1,3 +1,4 @@ +import json import unittest from csv import DictReader @@ -102,14 +103,16 @@ def test_csv_report_simple(self): self.assertEqual(records[2]["Test Case"], "test2") self.assertEqual(records[2]["Flow"], "flow1") self.assertEqual(records[2]["Result"], "Pass") - self.assertEqual(records[2]["Params"], str({"dtype": torch.float32})) + self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"})) # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") self.assertEqual(records[3]["Flow"], "flow1") self.assertEqual(records[3]["Result"], "Skip") - self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True})) + self.assertEqual( + records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"}) + ) def test_count_ops(self): """ From 9fd2133caa5c26767aaad0da5d080ac54aa20b97 Mon Sep 17 00:00:00 2001 From: Gregory James Comer Date: Wed, 17 Sep 2025 11:09:30 -0700 Subject: [PATCH 4/4] Update [ghstack-poisoned] --- .github/workflows/test-backend-arm.yml | 2 ++ .github/workflows/test-backend-coreml.yml | 2 ++ .github/workflows/test-backend-qnn.yml | 2 ++ .github/workflows/test-backend-vulkan.yml | 2 ++ .github/workflows/test-backend-xnnpack.yml | 2 ++ backends/test/suite/generate_markdown_summary.py | 16 +++++++++++----- 6 files changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-backend-arm.yml b/.github/workflows/test-backend-arm.yml index e57be2704a2..bee74fee172 100644 --- a/.github/workflows/test-backend-arm.yml +++ b/.github/workflows/test-backend-arm.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml index c6970ddff61..247f9576595 100644 --- a/.github/workflows/test-backend-coreml.yml +++ b/.github/workflows/test-backend-coreml.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml index 00933d6c74e..907c4d2dac0 100644 --- a/.github/workflows/test-backend-qnn.yml +++ b/.github/workflows/test-backend-qnn.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml index f04fdcdd1f1..cb2478fc825 100644 --- a/.github/workflows/test-backend-vulkan.yml +++ b/.github/workflows/test-backend-vulkan.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml index 2ae423dd99b..086c9625a38 100644 --- a/.github/workflows/test-backend-xnnpack.yml +++ b/.github/workflows/test-backend-xnnpack.yml @@ -4,6 +4,8 @@ on: schedule: - cron: 0 2 * * * push: + branches: + - release/* tags: - ciflow/nightly/* pull_request: diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py index 9e330ca6626..e54fc691723 100644 --- a/backends/test/suite/generate_markdown_summary.py +++ b/backends/test/suite/generate_markdown_summary.py @@ -151,11 +151,17 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) results = aggregate_results(csv_path) # Generate Summary section - total_rows = results.counts.total print("# Summary\n") - print(f"- **Pass**: {results.counts.passes}/{total_rows}") - print(f"- **Fail**: {results.counts.fails}/{total_rows}") - print(f"- **Skip**: {results.counts.skips}/{total_rows}") + total_excluding_skips = results.counts.passes + results.counts.fails + pass_fraction = results.counts.passes / total_excluding_skips + fail_fraction = results.counts.fails / total_excluding_skips + print( + f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)" + ) + print( + f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)" + ) + print(f"- **Skip**: {results.counts.skips}") if results.counts_by_params: print("\n## Results by Parameters\n") @@ -170,7 +176,7 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) parsed_params[params_str] = params_dict all_param_keys.update(params_dict.keys()) - if parsed_params: + if parsed_params and len(parsed_params) > 1: # Sort parameter keys for consistent column ordering sorted_param_keys = sorted(all_param_keys)