Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/test-backend-arm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-backend-coreml.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-backend-qnn.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-backend-vulkan.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/test-backend-xnnpack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ on:
schedule:
- cron: 0 2 * * *
push:
branches:
- release/*
tags:
- ciflow/nightly/*
pull_request:
Expand Down
231 changes: 168 additions & 63 deletions backends/test/suite/generate_markdown_summary.py
Original file line number Diff line number Diff line change
@@ -1,124 +1,229 @@
import argparse
import csv
import json
import sys

#
# A standalone script to generate a Markdown representation of a test report.
# This is primarily intended to be used with GitHub actions to generate a nice
# representation of the test results when looking at the action run.
#
# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
# Markdown is written to stdout.
#
from dataclasses import dataclass, field


def escape_for_markdown(text: str) -> str:
@dataclass
class ResultCounts:
"""
Modify a string to properly display in a markdown table cell.
Represents aggregated result counts for each status.
"""
if not text:
return text

# Replace newlines with <br /> tags
escaped = text.replace("\n", "<br />")
total: int = 0
passes: int = 0
fails: int = 0
skips: int = 0
by_detail: dict[str, int] = field(default_factory=lambda: {})

# Escape backslashes.
escaped = escaped.replace("\\", "\\\\")
def add_row(self, result_value: str, result_detail: str) -> None:
"""
Update the result counts for the specified row.
"""

# Escape pipe characters that would break table structure
escaped = escaped.replace("|", "\\|")
self.total += 1

return escaped
if result_value == "Pass":
self.passes += 1
elif result_value == "Fail":
self.fails += 1
elif result_value == "Skip":
self.skips += 1
else:
raise RuntimeError(f"Unknown result value {result_value}")

if result_detail:
if result_detail not in self.by_detail:
self.by_detail[result_detail] = 0

self.by_detail[result_detail] += 1


@dataclass
class AggregatedSummary:
"""
Represents aggegrated summary data for the test run.
"""

counts: ResultCounts
counts_by_params: dict[str, ResultCounts]
failed_tests: list[list[str]]
header: list[str]


#
# A standalone script to generate a Markdown representation of a test report.
# This is primarily intended to be used with GitHub actions to generate a nice
# representation of the test results when looking at the action run.
#
# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
# Markdown is written to stdout.
#

def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
# Print warning if exit code is non-zero
if exit_code != 0:
print("> [!WARNING]")
print(
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
)

def aggregate_results(csv_path: str) -> AggregatedSummary:
with open(csv_path, newline="", encoding="utf-8") as f:
reader = csv.reader(f)
rows = list(reader)

header = rows[0]
data_rows = rows[1:]

# Find the Result and Result Detail column indices
result_column_index = None
result_detail_column_index = None
for i, col in enumerate(header):
if col.lower() == "result":
result_column_index = i
elif col.lower() == "result detail":
result_detail_column_index = i
header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)}
params_column_index = header_indices_by_name.get("params", None)
result_column_index = header_indices_by_name["result"]
result_detail_column_index = header_indices_by_name["result detail"]

# Count results and prepare data
pass_count = 0
fail_count = 0
skip_count = 0
counts = ResultCounts()
failed_tests = []
processed_rows = []
result_detail_counts = {}
counts_by_param = {}

for row in data_rows:
result = row[result_column_index]
result_detail = row[result_detail_column_index]

counts.add_row(result, result_detail)

params = row[params_column_index] if params_column_index else None
if params:
if params not in counts_by_param:
counts_by_param[params] = ResultCounts()
counts_by_param[params].add_row(result, result_detail)

# Make a copy of the row to avoid modifying the original
processed_row = [escape_for_markdown(cell) for cell in row]

# Count results and collect failed tests
if result_column_index is not None and result_column_index < len(row):
result_value = row[result_column_index].strip().lower()
if result_value == "pass":
pass_count += 1
processed_row[result_column_index] = (
'<span style="color:green">Pass</span>'
)
elif result_value == "fail":
fail_count += 1
processed_row[result_column_index] = (
'<span style="color:red">Fail</span>'
)
failed_tests.append(processed_row.copy())
elif result_value == "skip":
skip_count += 1
processed_row[result_column_index] = (
'<span style="color:gray">Skip</span>'
)

# Count result details (excluding empty ones)
if result_detail_column_index is not None and result_detail_column_index < len(
row
):
result_detail_value = row[result_detail_column_index].strip()
if result_detail_value: # Only count non-empty result details
if result_detail_value in result_detail_counts:
result_detail_counts[result_detail_value] += 1
else:
result_detail_counts[result_detail_value] = 1
return AggregatedSummary(
counts=counts,
failed_tests=failed_tests,
counts_by_params=counts_by_param,
header=header,
)


def escape_for_markdown(text: str) -> str:
"""
Modify a string to properly display in a markdown table cell.
"""
if not text:
return text

# Replace newlines with <br /> tags
escaped = text.replace("\n", "<br />")

processed_rows.append(processed_row)
# Escape backslashes.
escaped = escaped.replace("\\", "\\\\")

# Escape pipe characters that would break table structure
escaped = escaped.replace("|", "\\|")

return escaped


def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
# Print warning if exit code is non-zero
if exit_code != 0:
print("> [!WARNING]")
print(
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
)

results = aggregate_results(csv_path)

# Generate Summary section
total_rows = len(data_rows)
print("# Summary\n")
print(f"- **Pass**: {pass_count}/{total_rows}")
print(f"- **Fail**: {fail_count}/{total_rows}")
print(f"- **Skip**: {skip_count}/{total_rows}")
total_excluding_skips = results.counts.passes + results.counts.fails
pass_fraction = results.counts.passes / total_excluding_skips
fail_fraction = results.counts.fails / total_excluding_skips
print(
f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
)
print(
f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
)
print(f"- **Skip**: {results.counts.skips}")

if results.counts_by_params:
print("\n## Results by Parameters\n")

# Extract all unique parameter keys from the JSON strings
all_param_keys = set()
parsed_params = {}

for params_str in results.counts_by_params.keys():
# Parse the JSON string (it's a string representation of a dict)
params_dict = json.loads(params_str)
parsed_params[params_str] = params_dict
all_param_keys.update(params_dict.keys())

if parsed_params and len(parsed_params) > 1:
# Sort parameter keys for consistent column ordering
sorted_param_keys = sorted(all_param_keys)

# Create table header
header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
print("| " + " | ".join(header_cols) + " |")
print("|" + "|".join(["---"] * len(header_cols)) + "|")

# Create table rows
for params_str, counts in results.counts_by_params.items():
if params_str in parsed_params:
params_dict = parsed_params[params_str]
row_values = []

# Add parameter values
for key in sorted_param_keys:
value = params_dict.get(key, "")
row_values.append(str(value))

pass_fraction = counts.passes / (counts.passes + counts.fails)

# Add count values
row_values.extend(
[
str(counts.passes),
str(counts.fails),
str(counts.skips),
f"{pass_fraction*100:.2f}%",
]
)

print("| " + " | ".join(row_values) + " |")

print()

print("## Failure Breakdown:")
total_rows_with_result_detail = sum(result_detail_counts.values())
for detail, count in sorted(result_detail_counts.items()):
total_rows_with_result_detail = sum(results.counts.by_detail.values())
for detail, count in sorted(results.counts.by_detail.items()):
print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")

# Generate Failed Tests section
print("# Failed Tests\n")
if failed_tests:
escaped_header = [escape_for_markdown(col) for col in header]
if results.failed_tests:
escaped_header = [escape_for_markdown(col) for col in results.header]
print("| " + " | ".join(escaped_header) + " |")
print("|" + "|".join(["---"] * len(header)) + "|")
for row in failed_tests:
print("|" + "|".join(["---"] * len(results.header)) + "|")
for row in results.failed_tests:
print("| " + " | ".join(row) + " |")
else:
print("No failed tests.\n")
Expand Down
5 changes: 4 additions & 1 deletion backends/test/suite/reporting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import json

from collections import Counter
from dataclasses import dataclass, field
Expand Down Expand Up @@ -343,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None:

def _serialize_params(params: dict[str, Any] | None) -> str:
if params is not None:
return str(dict(sorted(params.items())))
# Convert values to strings - JSON conversion doesn't like dtypes.
str_params = {k: str(v) for k, v in params.items()}
return json.dumps(str_params)
else:
return ""

Expand Down
2 changes: 1 addition & 1 deletion backends/test/suite/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bo
and node.target == exir_ops.edge.aten.convolution.default
):
in_rank = node.args[0].meta["val"].dim()
if in_rank != 4:
if in_rank > 4:
return True

return False
Expand Down
7 changes: 5 additions & 2 deletions backends/test/suite/tests/test_reporting.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import unittest

from csv import DictReader
Expand Down Expand Up @@ -102,14 +103,16 @@ def test_csv_report_simple(self):
self.assertEqual(records[2]["Test Case"], "test2")
self.assertEqual(records[2]["Flow"], "flow1")
self.assertEqual(records[2]["Result"], "Pass")
self.assertEqual(records[2]["Params"], str({"dtype": torch.float32}))
self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"}))

# Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
self.assertEqual(records[3]["Test Case"], "test2")
self.assertEqual(records[3]["Flow"], "flow1")
self.assertEqual(records[3]["Result"], "Skip")
self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True}))
self.assertEqual(
records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"})
)

def test_count_ops(self):
"""
Expand Down
Loading