diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh index 243602fea21..d230860875d 100755 --- a/.ci/scripts/test_backend_linux.sh +++ b/.ci/scripts/test_backend_linux.sh @@ -39,12 +39,17 @@ if [[ "$FLOW" == *qnn* ]]; then fi if [[ "$FLOW" == *vulkan* ]]; then - # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate + # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate. source .ci/scripts/setup-vulkan-linux-deps.sh EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON" fi +if [[ "$FLOW" == *arm* ]]; then + # Setup ARM deps. + .ci/scripts/setup-arm-baremetal-tools.sh +fi + # We need the runner to test the built library. PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true diff --git a/.github/workflows/_test_backend.yml b/.github/workflows/_test_backend.yml new file mode 100644 index 00000000000..64ade2d84ad --- /dev/null +++ b/.github/workflows/_test_backend.yml @@ -0,0 +1,84 @@ +name: Test Backend + +on: + workflow_call: + inputs: + backend: + description: 'Backend to test (xnnpack, coreml, vulkan, qnn)' + required: true + type: string + flows: + description: 'JSON array of flows to test' + required: true + type: string + ref: + description: 'Git ref to checkout' + required: false + type: string + default: ${{ github.sha }} + timeout: + description: 'Job timeout in minutes' + required: false + type: number + default: 120 + run-linux: + description: 'Whether to run Linux tests' + required: false + type: boolean + default: false + run-macos: + description: 'Whether to run macOS tests' + required: false + type: boolean + default: false + runner-linux: + description: 'Runner type for Linux jobs' + required: false + type: string + default: linux.4xlarge.memory + +jobs: + test-backend-linux: + if: ${{ inputs.run-linux }} + strategy: + fail-fast: false + matrix: + flow: ${{ fromJSON(inputs.flows) }} + suite: [models, operators] + + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9 + with: + ref: ${{ inputs.ref }} + runner: ${{ inputs.runner-linux }} + docker-image: ci-image:executorch-ubuntu-22.04-clang12 + submodules: recursive + timeout: ${{ inputs.timeout }} + upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} + script: | + set -eux + + source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" + + test-backend-macos: + if: ${{ inputs.run-macos }} + strategy: + fail-fast: false + matrix: + flow: ${{ fromJSON(inputs.flows) }} + suite: [models, operators] + + uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.9 + with: + ref: ${{ inputs.ref }} + runner: macos-m1-stable + python-version: "3.12" + submodules: recursive + timeout: ${{ inputs.timeout }} + upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} + script: | + set -eux + + # This is needed to get the prebuilt PyTorch wheel from S3 + ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21 + + source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 08ffc0792ff..e49ab85c301 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -27,7 +27,7 @@ jobs: repo-name: pytorch branch: main pin-folder: .ci/docker/ci_commit_pins - test-infra-ref: release/2.9 + test-infra-ref: main updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }} pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }} @@ -36,51 +36,3 @@ jobs: uses: ./.github/workflows/_link_check.yml with: ref: ${{ github.sha }} - - backend-test-linux: - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9 - strategy: - fail-fast: false - matrix: - flow: [ - qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w, - vulkan, vulkan_static_int8_per_channel, - xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor - ] - suite: [models, operators] - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - runner: linux.4xlarge.memory - docker-image: ci-image:executorch-ubuntu-22.04-clang12 - submodules: recursive - timeout: 120 - upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} - script: | - set -eux - - source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" - - backend-test-macos: - uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.9 - permissions: - id-token: write - contents: read - strategy: - fail-fast: false - matrix: - flow: [coreml, coreml_static_int8] - suite: [models, operators] - with: - ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} - runner: macos-m1-stable - python-version: 3.12 - submodules: recursive - timeout: 120 - upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }} - script: | - set -eux - - # This is needed to get the prebuilt PyTorch wheel from S3 - ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21 - - source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}" diff --git a/.github/workflows/test-backend-arm.yml b/.github/workflows/test-backend-arm.yml new file mode 100644 index 00000000000..bee74fee172 --- /dev/null +++ b/.github/workflows/test-backend-arm.yml @@ -0,0 +1,29 @@ +name: Test ARM Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-arm.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-arm: + uses: ./.github/workflows/_test_backend.yml + with: + backend: arm + flows: '["arm_tosa"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml new file mode 100644 index 00000000000..247f9576595 --- /dev/null +++ b/.github/workflows/test-backend-coreml.yml @@ -0,0 +1,29 @@ +name: Test CoreML Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-coreml.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-coreml: + uses: ./.github/workflows/_test_backend.yml + with: + backend: coreml + flows: '["coreml", "coreml_static_int8"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-macos: true diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml new file mode 100644 index 00000000000..907c4d2dac0 --- /dev/null +++ b/.github/workflows/test-backend-qnn.yml @@ -0,0 +1,30 @@ +name: Test QNN Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-qnn.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-qnn: + uses: ./.github/workflows/_test_backend.yml + with: + backend: qnn + flows: '["qnn", "qnn_16a16w", "qnn_16a8w", "qnn_16a4w", "qnn_16a4w_block", "qnn_8a8w"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true + runner-linux: linux.8xlarge.memory diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml new file mode 100644 index 00000000000..cb2478fc825 --- /dev/null +++ b/.github/workflows/test-backend-vulkan.yml @@ -0,0 +1,29 @@ +name: Test Vulkan Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-vulkan.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-vulkan: + uses: ./.github/workflows/_test_backend.yml + with: + backend: vulkan + flows: '["vulkan", "vulkan_static_int8_per_channel"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml new file mode 100644 index 00000000000..086c9625a38 --- /dev/null +++ b/.github/workflows/test-backend-xnnpack.yml @@ -0,0 +1,29 @@ +name: Test XNNPACK Backend + +on: + schedule: + - cron: 0 2 * * * + push: + branches: + - release/* + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-xnnpack.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-xnnpack: + uses: ./.github/workflows/_test_backend.yml + with: + backend: xnnpack + flows: '["xnnpack", "xnnpack_dynamic_int8_per_channel", "xnnpack_static_int8_per_channel", "xnnpack_static_int8_per_tensor"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index b7a126eaf35..a4b34fee98d 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Callable from executorch.backends.test.harness import Tester @@ -35,6 +35,15 @@ class TestFlow: is_delegated: bool = True """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """ + skip_patterns: list[str] = field(default_factory=lambda: []) + """ Tests with names containing any substrings in this list are skipped. """ + + supports_serialize: bool = True + """ True if the test flow supports the Serialize stage. """ + + def should_skip_test(self, test_name: str) -> bool: + return any(pattern in test_name for pattern in self.skip_patterns) + def all_flows() -> dict[str, TestFlow]: flows = [] @@ -109,4 +118,13 @@ def all_flows() -> dict[str, TestFlow]: except Exception as e: logger.info(f"Skipping QNN flow registration: {e}") + try: + from executorch.backends.test.suite.flows.arm import ARM_TOSA_FLOW + + flows += [ + ARM_TOSA_FLOW, + ] + except Exception as e: + logger.info(f"Skipping ARM flow registration: {e}") + return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/arm.py b/backends/test/suite/flows/arm.py new file mode 100644 index 00000000000..baa2df79de9 --- /dev/null +++ b/backends/test/suite/flows/arm.py @@ -0,0 +1,24 @@ +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.test.suite.flow import TestFlow + + +def _create_arm_tester_tosa_fp(*args, **kwargs) -> ArmTester: + kwargs["compile_spec"] = common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP") + + return ArmTester( + *args, + **kwargs, + ) + + +def _create_tosa_flow() -> TestFlow: + return TestFlow( + "arm_tosa", + backend="arm", + tester_factory=_create_arm_tester_tosa_fp, + supports_serialize=False, + ) + + +ARM_TOSA_FLOW = _create_tosa_flow() diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py index fd956b64f05..8a532ff0003 100644 --- a/backends/test/suite/flows/coreml.py +++ b/backends/test/suite/flows/coreml.py @@ -19,6 +19,7 @@ def _create_coreml_flow( CoreMLTester, minimum_deployment_target=minimum_deployment_target ), quantize=quantize, + skip_patterns=["test_argmin", "test_argmax"], ) diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py index 2a8c4e506fa..a3a4fb55aba 100644 --- a/backends/test/suite/flows/vulkan.py +++ b/backends/test/suite/flows/vulkan.py @@ -20,6 +20,7 @@ def _create_vulkan_flow_base( tester_factory=VulkanTester, quantize=quantize_stage_factory is not None, quantize_stage_factory=quantize_stage_factory, + skip_patterns=["float16", "float64"], # Not supported in swiftshader ) diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py index 37bf758fed0..e54fc691723 100644 --- a/backends/test/suite/generate_markdown_summary.py +++ b/backends/test/suite/generate_markdown_summary.py @@ -1,7 +1,58 @@ import argparse import csv +import json import sys +from dataclasses import dataclass, field + + +@dataclass +class ResultCounts: + """ + Represents aggregated result counts for each status. + """ + + total: int = 0 + passes: int = 0 + fails: int = 0 + skips: int = 0 + by_detail: dict[str, int] = field(default_factory=lambda: {}) + + def add_row(self, result_value: str, result_detail: str) -> None: + """ + Update the result counts for the specified row. + """ + + self.total += 1 + + if result_value == "Pass": + self.passes += 1 + elif result_value == "Fail": + self.fails += 1 + elif result_value == "Skip": + self.skips += 1 + else: + raise RuntimeError(f"Unknown result value {result_value}") + + if result_detail: + if result_detail not in self.by_detail: + self.by_detail[result_detail] = 0 + + self.by_detail[result_detail] += 1 + + +@dataclass +class AggregatedSummary: + """ + Represents aggegrated summary data for the test run. + """ + + counts: ResultCounts + counts_by_params: dict[str, ResultCounts] + failed_tests: list[list[str]] + header: list[str] + + # # A standalone script to generate a Markdown representation of a test report. # This is primarily intended to be used with GitHub actions to generate a nice @@ -12,14 +63,7 @@ # -def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) - # Print warning if exit code is non-zero - if exit_code != 0: - print("> [!WARNING]") - print( - f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n" - ) - +def aggregate_results(csv_path: str) -> AggregatedSummary: with open(csv_path, newline="", encoding="utf-8") as f: reader = csv.reader(f) rows = list(reader) @@ -27,78 +71,159 @@ def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) header = rows[0] data_rows = rows[1:] - # Find the Result and Result Detail column indices - result_column_index = None - result_detail_column_index = None - for i, col in enumerate(header): - if col.lower() == "result": - result_column_index = i - elif col.lower() == "result detail": - result_detail_column_index = i + header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)} + params_column_index = header_indices_by_name.get("params", None) + result_column_index = header_indices_by_name["result"] + result_detail_column_index = header_indices_by_name["result detail"] # Count results and prepare data - pass_count = 0 - fail_count = 0 - skip_count = 0 + counts = ResultCounts() failed_tests = [] - processed_rows = [] - result_detail_counts = {} + counts_by_param = {} for row in data_rows: + result = row[result_column_index] + result_detail = row[result_detail_column_index] + + counts.add_row(result, result_detail) + + params = row[params_column_index] if params_column_index else None + if params: + if params not in counts_by_param: + counts_by_param[params] = ResultCounts() + counts_by_param[params].add_row(result, result_detail) + # Make a copy of the row to avoid modifying the original - processed_row = row.copy() + processed_row = [escape_for_markdown(cell) for cell in row] # Count results and collect failed tests if result_column_index is not None and result_column_index < len(row): result_value = row[result_column_index].strip().lower() if result_value == "pass": - pass_count += 1 processed_row[result_column_index] = ( 'Pass' ) elif result_value == "fail": - fail_count += 1 processed_row[result_column_index] = ( 'Fail' ) failed_tests.append(processed_row.copy()) elif result_value == "skip": - skip_count += 1 processed_row[result_column_index] = ( 'Skip' ) - # Count result details (excluding empty ones) - if result_detail_column_index is not None and result_detail_column_index < len( - row - ): - result_detail_value = row[result_detail_column_index].strip() - if result_detail_value: # Only count non-empty result details - if result_detail_value in result_detail_counts: - result_detail_counts[result_detail_value] += 1 - else: - result_detail_counts[result_detail_value] = 1 + return AggregatedSummary( + counts=counts, + failed_tests=failed_tests, + counts_by_params=counts_by_param, + header=header, + ) + + +def escape_for_markdown(text: str) -> str: + """ + Modify a string to properly display in a markdown table cell. + """ + if not text: + return text - processed_rows.append(processed_row) + # Replace newlines with
tags + escaped = text.replace("\n", "
") + + # Escape backslashes. + escaped = escaped.replace("\\", "\\\\") + + # Escape pipe characters that would break table structure + escaped = escaped.replace("|", "\\|") + + return escaped + + +def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901) + # Print warning if exit code is non-zero + if exit_code != 0: + print("> [!WARNING]") + print( + f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n" + ) + + results = aggregate_results(csv_path) # Generate Summary section - total_rows = len(data_rows) print("# Summary\n") - print(f"- **Pass**: {pass_count}/{total_rows}") - print(f"- **Fail**: {fail_count}/{total_rows}") - print(f"- **Skip**: {skip_count}/{total_rows}") + total_excluding_skips = results.counts.passes + results.counts.fails + pass_fraction = results.counts.passes / total_excluding_skips + fail_fraction = results.counts.fails / total_excluding_skips + print( + f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)" + ) + print( + f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)" + ) + print(f"- **Skip**: {results.counts.skips}") + + if results.counts_by_params: + print("\n## Results by Parameters\n") + + # Extract all unique parameter keys from the JSON strings + all_param_keys = set() + parsed_params = {} + + for params_str in results.counts_by_params.keys(): + # Parse the JSON string (it's a string representation of a dict) + params_dict = json.loads(params_str) + parsed_params[params_str] = params_dict + all_param_keys.update(params_dict.keys()) + + if parsed_params and len(parsed_params) > 1: + # Sort parameter keys for consistent column ordering + sorted_param_keys = sorted(all_param_keys) + + # Create table header + header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"] + print("| " + " | ".join(header_cols) + " |") + print("|" + "|".join(["---"] * len(header_cols)) + "|") + + # Create table rows + for params_str, counts in results.counts_by_params.items(): + if params_str in parsed_params: + params_dict = parsed_params[params_str] + row_values = [] + + # Add parameter values + for key in sorted_param_keys: + value = params_dict.get(key, "") + row_values.append(str(value)) + + pass_fraction = counts.passes / (counts.passes + counts.fails) + + # Add count values + row_values.extend( + [ + str(counts.passes), + str(counts.fails), + str(counts.skips), + f"{pass_fraction*100:.2f}%", + ] + ) + + print("| " + " | ".join(row_values) + " |") + + print() print("## Failure Breakdown:") - total_rows_with_result_detail = sum(result_detail_counts.values()) - for detail, count in sorted(result_detail_counts.items()): + total_rows_with_result_detail = sum(results.counts.by_detail.values()) + for detail, count in sorted(results.counts.by_detail.items()): print(f"- **{detail}**: {count}/{total_rows_with_result_detail}") # Generate Failed Tests section print("# Failed Tests\n") - if failed_tests: - print("| " + " | ".join(header) + " |") - print("|" + "|".join(["---"] * len(header)) + "|") - for row in failed_tests: + if results.failed_tests: + escaped_header = [escape_for_markdown(col) for col in results.header] + print("| " + " | ".join(escaped_header) + " |") + print("|" + "|".join(["---"] * len(results.header)) + "|") + for row in results.failed_tests: print("| " + " | ".join(row) + " |") else: print("No failed tests.\n") diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py index 65b546b0eb5..ea44275a463 100644 --- a/backends/test/suite/models/__init__.py +++ b/backends/test/suite/models/__init__.py @@ -52,6 +52,11 @@ def wrapped_test(self): "use_dynamic_shapes": use_dynamic_shapes, } with TestContext(test_name, test_func.__name__, flow.name, params): + if flow.should_skip_test(test_name): + raise unittest.SkipTest( + f"Skipping test due to matching flow {flow.name} skip patterns" + ) + test_func(self, flow, dtype, use_dynamic_shapes) wrapped_test._name = test_func.__name__ # type: ignore diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py index 6ceb9086f71..9c550b3a49c 100644 --- a/backends/test/suite/operators/__init__.py +++ b/backends/test/suite/operators/__init__.py @@ -97,6 +97,11 @@ def _make_wrapped_test( ): def wrapped_test(self): with TestContext(test_name, test_base_name, flow.name, params): + if flow.should_skip_test(test_name): + raise unittest.SkipTest( + f"Skipping test due to matching flow {flow.name} skip patterns" + ) + test_kwargs = copy.copy(params) or {} test_kwargs["flow"] = flow diff --git a/backends/test/suite/operators/test_abs.py b/backends/test/suite/operators/test_abs.py index fdfc6be671e..484281e294e 100644 --- a/backends/test/suite/operators/test_abs.py +++ b/backends/test/suite/operators/test_abs.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -45,6 +47,7 @@ def test_abs_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(AbsModel(), (torch.randn(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_abs_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_amax.py b/backends/test/suite/operators/test_amax.py index 0c9a8c06f0d..04e0b17ae0a 100644 --- a/backends/test/suite/operators/test_amax.py +++ b/backends/test/suite/operators/test_amax.py @@ -6,6 +6,7 @@ # pyre-unsafe +import unittest from typing import List, Optional, Tuple, Union import torch @@ -201,6 +202,7 @@ def test_amax_shapes(self, flow: TestFlow) -> None: flow, ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_amax_edge_cases(self, flow: TestFlow) -> None: x = torch.tensor([[1.0, float("inf"), 3.0], [4.0, 5.0, float("inf")]]) self._test_op( diff --git a/backends/test/suite/operators/test_amin.py b/backends/test/suite/operators/test_amin.py index f4b88b1dade..7aa5c6b7a34 100644 --- a/backends/test/suite/operators/test_amin.py +++ b/backends/test/suite/operators/test_amin.py @@ -6,6 +6,7 @@ # pyre-unsafe +import unittest from typing import List, Optional, Tuple, Union import torch @@ -203,6 +204,7 @@ def test_amin_shapes(self, flow: TestFlow) -> None: flow, ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_amin_edge_cases(self, flow: TestFlow) -> None: x = torch.tensor([[1.0, float("-inf"), 3.0], [4.0, 5.0, float("-inf")]]) self._test_op( diff --git a/backends/test/suite/operators/test_argmax.py b/backends/test/suite/operators/test_argmax.py index dc8b57fc214..ca3ae9e1805 100644 --- a/backends/test/suite/operators/test_argmax.py +++ b/backends/test/suite/operators/test_argmax.py @@ -6,6 +6,7 @@ # pyre-unsafe +import unittest from typing import Optional import torch @@ -143,6 +144,7 @@ def test_argmax_shapes(self, flow: TestFlow) -> None: flow, ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_argmax_edge_cases(self, flow: TestFlow) -> None: x = torch.tensor([[1.0, float("inf"), 3.0], [4.0, 5.0, float("inf")]]) self._test_op( diff --git a/backends/test/suite/operators/test_argmin.py b/backends/test/suite/operators/test_argmin.py index d7a24e24f5a..aaf4e9bd167 100644 --- a/backends/test/suite/operators/test_argmin.py +++ b/backends/test/suite/operators/test_argmin.py @@ -6,6 +6,7 @@ # pyre-unsafe +import unittest from typing import Optional import torch @@ -143,6 +144,7 @@ def test_argmin_shapes(self, flow: TestFlow) -> None: flow, ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_argmin_edge_cases(self, flow: TestFlow) -> None: x = torch.tensor([[1.0, float("-inf"), 3.0], [4.0, 5.0, float("-inf")]]) self._test_op( diff --git a/backends/test/suite/operators/test_ceil.py b/backends/test/suite/operators/test_ceil.py index 198c9e9fe16..4d7c0a5e888 100644 --- a/backends/test/suite/operators/test_ceil.py +++ b/backends/test/suite/operators/test_ceil.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -45,6 +47,7 @@ def test_ceil_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(CeilModel(), (torch.randn(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_ceil_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_clamp.py b/backends/test/suite/operators/test_clamp.py index 67c61c67caa..49419f0453a 100644 --- a/backends/test/suite/operators/test_clamp.py +++ b/backends/test/suite/operators/test_clamp.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -56,6 +58,7 @@ def test_clamp_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(model, (torch.randn(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_clamp_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_exp.py b/backends/test/suite/operators/test_exp.py index bdae5c6a5e6..54196d81ba9 100644 --- a/backends/test/suite/operators/test_exp.py +++ b/backends/test/suite/operators/test_exp.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -46,6 +48,7 @@ def test_exp_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(ExpModel(), (torch.randn(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_exp_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_floor.py b/backends/test/suite/operators/test_floor.py index fcc834afa16..bce9f0b4d34 100644 --- a/backends/test/suite/operators/test_floor.py +++ b/backends/test/suite/operators/test_floor.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -42,6 +44,7 @@ def test_floor_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(FloorModel(), (torch.randn(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_floor_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_floor_divide.py b/backends/test/suite/operators/test_floor_divide.py index 87104af11dc..c14151b6181 100644 --- a/backends/test/suite/operators/test_floor_divide.py +++ b/backends/test/suite/operators/test_floor_divide.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -178,6 +180,7 @@ def test_floor_divide_values(self, flow: TestFlow) -> None: y = torch.tensor([-2.0]).expand_as(x).clone() self._test_op(model, (x, y), flow, generate_random_test_inputs=False) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_floor_divide_edge_cases(self, flow: TestFlow) -> None: # Test edge cases model = FloorDivideModel() diff --git a/backends/test/suite/operators/test_log.py b/backends/test/suite/operators/test_log.py index 96ba8da1292..c4af1fe442b 100644 --- a/backends/test/suite/operators/test_log.py +++ b/backends/test/suite/operators/test_log.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -46,6 +48,7 @@ def test_log_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(LogModel(), (torch.rand(3, 4, 5) + 0.01,), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_log_edge_cases(self, flow: TestFlow) -> None: # Test edge cases # Tensor with infinity diff --git a/backends/test/suite/operators/test_log10.py b/backends/test/suite/operators/test_log10.py index 7d0e2e111d6..aeb97671f1b 100644 --- a/backends/test/suite/operators/test_log10.py +++ b/backends/test/suite/operators/test_log10.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -46,6 +48,7 @@ def test_log10_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(Log10Model(), (torch.rand(3, 4, 5) + 0.01,), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_log10_edge_cases(self, flow: TestFlow) -> None: # Test edge cases # Tensor with infinity diff --git a/backends/test/suite/operators/test_log1p.py b/backends/test/suite/operators/test_log1p.py index 383e3116b32..08a5c382076 100644 --- a/backends/test/suite/operators/test_log1p.py +++ b/backends/test/suite/operators/test_log1p.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -46,6 +48,7 @@ def test_log1p_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(Log1pModel(), (torch.rand(3, 4, 5) * 2 - 0.5,), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_log1p_edge_cases(self, flow: TestFlow) -> None: # Test edge cases # Tensor with infinity diff --git a/backends/test/suite/operators/test_log2.py b/backends/test/suite/operators/test_log2.py index ddcafaf08d2..16161d334f6 100644 --- a/backends/test/suite/operators/test_log2.py +++ b/backends/test/suite/operators/test_log2.py @@ -7,6 +7,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -46,6 +48,7 @@ def test_log2_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(Log2Model(), (torch.rand(3, 4, 5) + 0.01,), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_log2_edge_cases(self, flow: TestFlow) -> None: # Test edge cases # Tensor with infinity diff --git a/backends/test/suite/operators/test_mean.py b/backends/test/suite/operators/test_mean.py index 746a4b16d9f..6c5c779364b 100644 --- a/backends/test/suite/operators/test_mean.py +++ b/backends/test/suite/operators/test_mean.py @@ -6,6 +6,7 @@ # pyre-unsafe +import unittest from typing import List, Optional, Tuple, Union import torch @@ -229,6 +230,7 @@ def test_mean_shapes(self, flow: TestFlow) -> None: flow, ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_mean_edge_cases(self, flow: TestFlow) -> None: x = torch.tensor([[1.0, float("inf"), 3.0], [4.0, 5.0, float("inf")]]) self._test_op( diff --git a/backends/test/suite/operators/test_median.py b/backends/test/suite/operators/test_median.py index 93823b812ca..0b515d68efd 100644 --- a/backends/test/suite/operators/test_median.py +++ b/backends/test/suite/operators/test_median.py @@ -6,6 +6,7 @@ # pyre-unsafe +import unittest from typing import Optional import torch @@ -167,6 +168,7 @@ def test_median_shapes(self, flow: TestFlow) -> None: # 5D tensor self._test_op(MedianValueOnlyModel(), (torch.randn(2, 2, 3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_median_edge_cases(self, flow: TestFlow) -> None: # Tensor with NaN (NaN should be propagated) x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]]) diff --git a/backends/test/suite/operators/test_neg.py b/backends/test/suite/operators/test_neg.py index 35c9d851817..bc1adede877 100644 --- a/backends/test/suite/operators/test_neg.py +++ b/backends/test/suite/operators/test_neg.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -55,6 +57,7 @@ def test_neg_shapes(self, flow: TestFlow) -> None: NegModel(), (torch.randn(3, 4, 5),), flow, generate_random_test_inputs=False ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_neg_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_pow.py b/backends/test/suite/operators/test_pow.py index 334038d73d3..3082ad6ebaf 100644 --- a/backends/test/suite/operators/test_pow.py +++ b/backends/test/suite/operators/test_pow.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -127,6 +129,7 @@ def test_pow_shapes(self, flow: TestFlow) -> None: model, (torch.rand(3, 4, 5) + 0.1,), flow, generate_random_test_inputs=False ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_pow_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_round.py b/backends/test/suite/operators/test_round.py index ca8e6368d48..3a3577bea32 100644 --- a/backends/test/suite/operators/test_round.py +++ b/backends/test/suite/operators/test_round.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -52,6 +54,7 @@ def test_round_values(self, flow: TestFlow) -> None: x = torch.arange(-5, 5, 0.5) # [-5.0, -4.5, -4.0, ..., 4.0, 4.5] self._test_op(RoundModel(), (x,), flow, generate_random_test_inputs=False) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_round_edge_cases(self, flow: TestFlow) -> None: # Test edge cases @@ -98,6 +101,7 @@ def test_round_decimals(self, flow: TestFlow) -> None: RoundModel(decimals=-2), (x,), flow, generate_random_test_inputs=False ) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_round_decimals_edge_cases(self, flow: TestFlow) -> None: # Test edge cases with decimal places diff --git a/backends/test/suite/operators/test_rsqrt.py b/backends/test/suite/operators/test_rsqrt.py index 175bbcdb2cc..705833194fb 100644 --- a/backends/test/suite/operators/test_rsqrt.py +++ b/backends/test/suite/operators/test_rsqrt.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -45,6 +47,7 @@ def test_rsqrt_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(RsqrtModel(), (torch.rand(3, 4, 5) + 0.01,), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_rsqrt_edge_cases(self, flow: TestFlow) -> None: # Tensor with infinity x = torch.tensor([float("inf"), 1.0, 4.0]) diff --git a/backends/test/suite/operators/test_sqrt.py b/backends/test/suite/operators/test_sqrt.py index c3874dcb209..3d327ade6a5 100644 --- a/backends/test/suite/operators/test_sqrt.py +++ b/backends/test/suite/operators/test_sqrt.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -45,6 +47,7 @@ def test_sqrt_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(SqrtModel(), (torch.rand(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_sqrt_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_square.py b/backends/test/suite/operators/test_square.py index 52cd739bf9f..39ed212e426 100644 --- a/backends/test/suite/operators/test_square.py +++ b/backends/test/suite/operators/test_square.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -44,6 +46,7 @@ def test_square_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(SquareModel(), (torch.randn(3, 4, 5),), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_square_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/operators/test_trunc.py b/backends/test/suite/operators/test_trunc.py index 1d6d18817bd..71dcbf59176 100644 --- a/backends/test/suite/operators/test_trunc.py +++ b/backends/test/suite/operators/test_trunc.py @@ -6,6 +6,8 @@ # pyre-unsafe +import unittest + import torch from executorch.backends.test.suite.flow import TestFlow @@ -44,6 +46,7 @@ def test_trunc_shapes(self, flow: TestFlow) -> None: # 3D tensor self._test_op(TruncModel(), (torch.randn(3, 4, 5) * 5,), flow) + @unittest.skip("NaN and Inf are not enforced for backends.") def test_trunc_edge_cases(self, flow: TestFlow) -> None: # Test edge cases diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index ce8a48dcc12..09e950ab672 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -1,4 +1,5 @@ import csv +import json from collections import Counter from dataclasses import dataclass, field @@ -45,6 +46,8 @@ ] ) +CSV_FIELD_NAMES.append("Error") + # Operators that are excluded from the counts returned by count_ops. These are used to # exclude operatations that are not logically relevant or delegatable to backends. @@ -341,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None: def _serialize_params(params: dict[str, Any] | None) -> str: if params is not None: - return str(dict(sorted(params.items()))) + # Convert values to strings - JSON conversion doesn't like dtypes. + str_params = {k: str(v) for k, v in params.items()} + return json.dumps(str_params) else: return "" @@ -365,6 +370,15 @@ def write_csv_header(output: TextIO): def write_csv_row(record: TestCaseSummary, output: TextIO): writer = csv.DictWriter(output, CSV_FIELD_NAMES) + # Truncate error message if it's too long, keeping first and last 200 characters + error_message = "" + if record.error is not None: + error_str = str(record.error) + if len(error_str) > 400: + error_message = error_str[:200] + "..." + error_str[-200:] + else: + error_message = error_str + row = { "Test ID": record.name, "Test Case": record.base_name, @@ -373,6 +387,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO): "Params": _serialize_params(record.params), "Result": record.result.to_short_str(), "Result Detail": record.result.to_detail_str(), + "Error": error_message, "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( f"{record.quantize_time.total_seconds():.3f}" diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 1f84db9c730..a6d7d07bce0 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -15,6 +15,7 @@ UNSUPPORTED_PORTABLE_OPS = { "aten::_embedding_bag", "aten::_adaptive_avg_pool2d", + "aten::adaptive_max_pool2d", "aten::median", "aten::median.dim", "aten::round.decimals", @@ -34,6 +35,7 @@ TestResult, ) from executorch.exir import EdgeProgramManager +from executorch.exir.dialects._ops import ops as exir_ops # A list of all runnable test suites and the corresponding python package. @@ -43,6 +45,24 @@ } +def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bool: + # Returns true if the model contains patterns that will fail when running on the ET + # portable kernel library. + + # Check for 3d convolutions. All convs (1d, 2d, 3d) use the same op, so we need to look at + # the input meta to determine the rank. + for node in program.graph.nodes: + if ( + node.op == "call_function" + and node.target == exir_ops.edge.aten.convolution.default + ): + in_rank = node.args[0].meta["val"].dim() + if in_rank > 4: + return True + + return False + + def _get_test_seed(test_base_name: str) -> int: # Set the seed based on the test base name to give consistent inputs between backends. Add the # run seed to allow for reproducible results, but still allow for run-to-run variation. @@ -162,7 +182,7 @@ def build_result( # Check if any undelegated ops are in the unsupported ops set. has_unsupported_ops = any( op in UNSUPPORTED_PORTABLE_OPS for op in undelegated_op_counts.keys() - ) + ) or _graph_has_unsupported_patterns(edge_manager._etrecord.edge_dialect_program) # Skip the test if there are unsupported portable ops remaining. if has_unsupported_ops: @@ -171,8 +191,11 @@ def build_result( # Only run the runtime portion if something was delegated (or the flow doesn't delegate) if is_delegated or not flow.is_delegated: try: - tester.to_executorch().serialize() - extra_stats["pte_size_bytes"] = len(tester.get_artifact()) + tester.to_executorch() + + if flow.supports_serialize: + tester.serialize() + extra_stats["pte_size_bytes"] = len(tester.get_artifact()) except Exception as e: # We could introduce a result value for this, but I'm not sure it's necessary. # We can do this if we ever see to_executorch() or serialize() fail due a backend issue. diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 58ff76cba17..e42681fc678 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -1,3 +1,4 @@ +import json import unittest from csv import DictReader @@ -102,14 +103,16 @@ def test_csv_report_simple(self): self.assertEqual(records[2]["Test Case"], "test2") self.assertEqual(records[2]["Flow"], "flow1") self.assertEqual(records[2]["Result"], "Pass") - self.assertEqual(records[2]["Params"], str({"dtype": torch.float32})) + self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"})) # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") self.assertEqual(records[3]["Flow"], "flow1") self.assertEqual(records[3]["Result"], "Skip") - self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True})) + self.assertEqual( + records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"}) + ) def test_count_ops(self): """