diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh
index 243602fea21..d230860875d 100755
--- a/.ci/scripts/test_backend_linux.sh
+++ b/.ci/scripts/test_backend_linux.sh
@@ -39,12 +39,17 @@ if [[ "$FLOW" == *qnn* ]]; then
 fi
 
 if [[ "$FLOW" == *vulkan* ]]; then
-    # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
+    # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate.
     source .ci/scripts/setup-vulkan-linux-deps.sh
 
     EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
 fi
 
+if [[ "$FLOW" == *arm* ]]; then
+    # Setup ARM deps.
+    .ci/scripts/setup-arm-baremetal-tools.sh
+fi
+
 # We need the runner to test the built library.
 PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
 
diff --git a/.github/workflows/_test_backend.yml b/.github/workflows/_test_backend.yml
new file mode 100644
index 00000000000..64ade2d84ad
--- /dev/null
+++ b/.github/workflows/_test_backend.yml
@@ -0,0 +1,84 @@
+name: Test Backend
+
+on:
+  workflow_call:
+    inputs:
+      backend:
+        description: 'Backend to test (xnnpack, coreml, vulkan, qnn)'
+        required: true
+        type: string
+      flows:
+        description: 'JSON array of flows to test'
+        required: true
+        type: string
+      ref:
+        description: 'Git ref to checkout'
+        required: false
+        type: string
+        default: ${{ github.sha }}
+      timeout:
+        description: 'Job timeout in minutes'
+        required: false
+        type: number
+        default: 120
+      run-linux:
+        description: 'Whether to run Linux tests'
+        required: false
+        type: boolean
+        default: false
+      run-macos:
+        description: 'Whether to run macOS tests'
+        required: false
+        type: boolean
+        default: false
+      runner-linux:
+        description: 'Runner type for Linux jobs'
+        required: false
+        type: string
+        default: linux.4xlarge.memory
+
+jobs:
+  test-backend-linux:
+    if: ${{ inputs.run-linux }}
+    strategy:
+      fail-fast: false
+      matrix:
+        flow: ${{ fromJSON(inputs.flows) }}
+        suite: [models, operators]
+
+    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
+    with:
+      ref: ${{ inputs.ref }}
+      runner: ${{ inputs.runner-linux }}
+      docker-image: ci-image:executorch-ubuntu-22.04-clang12
+      submodules: recursive
+      timeout: ${{ inputs.timeout }}
+      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+      script: |
+        set -eux
+
+        source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
+
+  test-backend-macos:
+    if: ${{ inputs.run-macos }}
+    strategy:
+      fail-fast: false
+      matrix:
+        flow: ${{ fromJSON(inputs.flows) }}
+        suite: [models, operators]
+
+    uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.9
+    with:
+      ref: ${{ inputs.ref }}
+      runner: macos-m1-stable
+      python-version: "3.12"
+      submodules: recursive
+      timeout: ${{ inputs.timeout }}
+      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
+      script: |
+        set -eux
+
+        # This is needed to get the prebuilt PyTorch wheel from S3
+        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
+
+        source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml
index 08ffc0792ff..e49ab85c301 100644
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@@ -27,7 +27,7 @@ jobs:
           repo-name: pytorch
           branch: main
           pin-folder: .ci/docker/ci_commit_pins
-          test-infra-ref: release/2.9
+          test-infra-ref: main
           updatebot-token: ${{ secrets.UPDATEBOT_TOKEN }}
           pytorchbot-token: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
 
@@ -36,51 +36,3 @@ jobs:
     uses: ./.github/workflows/_link_check.yml
     with:
       ref: ${{ github.sha }}
-
-  backend-test-linux:
-    uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@release/2.9
-    strategy:
-      fail-fast: false
-      matrix:
-        flow: [
-          qnn, qnn_16a16w, qnn_16a8w, qnn_16a4w, qnn_16a4w_block, qnn_8a8w,
-          vulkan, vulkan_static_int8_per_channel,
-          xnnpack, xnnpack_dynamic_int8_per_channel, xnnpack_static_int8_per_channel, xnnpack_static_int8_per_tensor
-        ]
-        suite: [models, operators]
-    with:
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      runner: linux.4xlarge.memory
-      docker-image: ci-image:executorch-ubuntu-22.04-clang12
-      submodules: recursive
-      timeout: 120
-      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
-      script: |
-        set -eux
-
-        source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
-
-  backend-test-macos:
-    uses: pytorch/test-infra/.github/workflows/macos_job.yml@release/2.9
-    permissions:
-      id-token: write
-      contents: read
-    strategy:
-      fail-fast: false
-      matrix:
-        flow: [coreml, coreml_static_int8]
-        suite: [models, operators]
-    with:
-      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
-      runner: macos-m1-stable
-      python-version: 3.12
-      submodules: recursive
-      timeout: 120
-      upload-artifact: test-report-${{ matrix.flow }}-${{ matrix.suite }}
-      script: |
-        set -eux
-
-        # This is needed to get the prebuilt PyTorch wheel from S3
-        ${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
-
-        source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
diff --git a/.github/workflows/test-backend-arm.yml b/.github/workflows/test-backend-arm.yml
new file mode 100644
index 00000000000..bee74fee172
--- /dev/null
+++ b/.github/workflows/test-backend-arm.yml
@@ -0,0 +1,29 @@
+name: Test ARM Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    branches:
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-arm.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-arm:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: arm
+      flows: '["arm_tosa"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
diff --git a/.github/workflows/test-backend-coreml.yml b/.github/workflows/test-backend-coreml.yml
new file mode 100644
index 00000000000..247f9576595
--- /dev/null
+++ b/.github/workflows/test-backend-coreml.yml
@@ -0,0 +1,29 @@
+name: Test CoreML Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    branches:
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-coreml.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-coreml:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: coreml
+      flows: '["coreml", "coreml_static_int8"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-macos: true
diff --git a/.github/workflows/test-backend-qnn.yml b/.github/workflows/test-backend-qnn.yml
new file mode 100644
index 00000000000..907c4d2dac0
--- /dev/null
+++ b/.github/workflows/test-backend-qnn.yml
@@ -0,0 +1,30 @@
+name: Test QNN Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    branches:
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-qnn.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-qnn:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: qnn
+      flows: '["qnn", "qnn_16a16w", "qnn_16a8w", "qnn_16a4w", "qnn_16a4w_block", "qnn_8a8w"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
+      runner-linux: linux.8xlarge.memory
diff --git a/.github/workflows/test-backend-vulkan.yml b/.github/workflows/test-backend-vulkan.yml
new file mode 100644
index 00000000000..cb2478fc825
--- /dev/null
+++ b/.github/workflows/test-backend-vulkan.yml
@@ -0,0 +1,29 @@
+name: Test Vulkan Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    branches:
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-vulkan.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-vulkan:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: vulkan
+      flows: '["vulkan", "vulkan_static_int8_per_channel"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
diff --git a/.github/workflows/test-backend-xnnpack.yml b/.github/workflows/test-backend-xnnpack.yml
new file mode 100644
index 00000000000..086c9625a38
--- /dev/null
+++ b/.github/workflows/test-backend-xnnpack.yml
@@ -0,0 +1,29 @@
+name: Test XNNPACK Backend
+
+on:
+  schedule:
+    - cron: 0 2 * * *
+  push:
+    branches:
+      - release/*
+    tags:
+      - ciflow/nightly/*
+  pull_request:
+    paths:
+      - .github/workflows/test-backend-xnnpack.yml
+      - .github/workflows/_test_backend.yml
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
+  cancel-in-progress: true
+
+jobs:
+  test-xnnpack:
+    uses: ./.github/workflows/_test_backend.yml
+    with:
+      backend: xnnpack
+      flows: '["xnnpack", "xnnpack_dynamic_int8_per_channel", "xnnpack_static_int8_per_channel", "xnnpack_static_int8_per_tensor"]'
+      ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
+      timeout: 120
+      run-linux: true
diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py
index b7a126eaf35..a4b34fee98d 100644
--- a/backends/test/suite/flow.py
+++ b/backends/test/suite/flow.py
@@ -1,6 +1,6 @@
 import logging
 
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from typing import Callable
 
 from executorch.backends.test.harness import Tester
@@ -35,6 +35,15 @@ class TestFlow:
     is_delegated: bool = True
     """ Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """
 
+    skip_patterns: list[str] = field(default_factory=lambda: [])
+    """ Tests with names containing any substrings in this list are skipped. """
+
+    supports_serialize: bool = True
+    """ True if the test flow supports the Serialize stage. """
+
+    def should_skip_test(self, test_name: str) -> bool:
+        return any(pattern in test_name for pattern in self.skip_patterns)
+
 
 def all_flows() -> dict[str, TestFlow]:
     flows = []
@@ -109,4 +118,13 @@ def all_flows() -> dict[str, TestFlow]:
     except Exception as e:
         logger.info(f"Skipping QNN flow registration: {e}")
 
+    try:
+        from executorch.backends.test.suite.flows.arm import ARM_TOSA_FLOW
+
+        flows += [
+            ARM_TOSA_FLOW,
+        ]
+    except Exception as e:
+        logger.info(f"Skipping ARM flow registration: {e}")
+
     return {f.name: f for f in flows if f is not None}
diff --git a/backends/test/suite/flows/arm.py b/backends/test/suite/flows/arm.py
new file mode 100644
index 00000000000..baa2df79de9
--- /dev/null
+++ b/backends/test/suite/flows/arm.py
@@ -0,0 +1,24 @@
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.backends.test.suite.flow import TestFlow
+
+
+def _create_arm_tester_tosa_fp(*args, **kwargs) -> ArmTester:
+    kwargs["compile_spec"] = common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP")
+
+    return ArmTester(
+        *args,
+        **kwargs,
+    )
+
+
+def _create_tosa_flow() -> TestFlow:
+    return TestFlow(
+        "arm_tosa",
+        backend="arm",
+        tester_factory=_create_arm_tester_tosa_fp,
+        supports_serialize=False,
+    )
+
+
+ARM_TOSA_FLOW = _create_tosa_flow()
diff --git a/backends/test/suite/flows/coreml.py b/backends/test/suite/flows/coreml.py
index fd956b64f05..8a532ff0003 100644
--- a/backends/test/suite/flows/coreml.py
+++ b/backends/test/suite/flows/coreml.py
@@ -19,6 +19,7 @@ def _create_coreml_flow(
             CoreMLTester, minimum_deployment_target=minimum_deployment_target
         ),
         quantize=quantize,
+        skip_patterns=["test_argmin", "test_argmax"],
     )
 
 
diff --git a/backends/test/suite/flows/vulkan.py b/backends/test/suite/flows/vulkan.py
index 2a8c4e506fa..a3a4fb55aba 100644
--- a/backends/test/suite/flows/vulkan.py
+++ b/backends/test/suite/flows/vulkan.py
@@ -20,6 +20,7 @@ def _create_vulkan_flow_base(
         tester_factory=VulkanTester,
         quantize=quantize_stage_factory is not None,
         quantize_stage_factory=quantize_stage_factory,
+        skip_patterns=["float16", "float64"],  # Not supported in swiftshader
     )
 
 
diff --git a/backends/test/suite/generate_markdown_summary.py b/backends/test/suite/generate_markdown_summary.py
index 37bf758fed0..e54fc691723 100644
--- a/backends/test/suite/generate_markdown_summary.py
+++ b/backends/test/suite/generate_markdown_summary.py
@@ -1,7 +1,58 @@
 import argparse
 import csv
+import json
 import sys
 
+from dataclasses import dataclass, field
+
+
+@dataclass
+class ResultCounts:
+    """
+    Represents aggregated result counts for each status.
+    """
+
+    total: int = 0
+    passes: int = 0
+    fails: int = 0
+    skips: int = 0
+    by_detail: dict[str, int] = field(default_factory=lambda: {})
+
+    def add_row(self, result_value: str, result_detail: str) -> None:
+        """
+        Update the result counts for the specified row.
+        """
+
+        self.total += 1
+
+        if result_value == "Pass":
+            self.passes += 1
+        elif result_value == "Fail":
+            self.fails += 1
+        elif result_value == "Skip":
+            self.skips += 1
+        else:
+            raise RuntimeError(f"Unknown result value {result_value}")
+
+        if result_detail:
+            if result_detail not in self.by_detail:
+                self.by_detail[result_detail] = 0
+
+            self.by_detail[result_detail] += 1
+
+
+@dataclass
+class AggregatedSummary:
+    """
+    Represents aggegrated summary data for the test run.
+    """
+
+    counts: ResultCounts
+    counts_by_params: dict[str, ResultCounts]
+    failed_tests: list[list[str]]
+    header: list[str]
+
+
 #
 # A standalone script to generate a Markdown representation of a test report.
 # This is primarily intended to be used with GitHub actions to generate a nice
@@ -12,14 +63,7 @@
 #
 
 
-def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
-    # Print warning if exit code is non-zero
-    if exit_code != 0:
-        print("> [!WARNING]")
-        print(
-            f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
-        )
-
+def aggregate_results(csv_path: str) -> AggregatedSummary:
     with open(csv_path, newline="", encoding="utf-8") as f:
         reader = csv.reader(f)
         rows = list(reader)
@@ -27,78 +71,159 @@ def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
     header = rows[0]
     data_rows = rows[1:]
 
-    # Find the Result and Result Detail column indices
-    result_column_index = None
-    result_detail_column_index = None
-    for i, col in enumerate(header):
-        if col.lower() == "result":
-            result_column_index = i
-        elif col.lower() == "result detail":
-            result_detail_column_index = i
+    header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)}
+    params_column_index = header_indices_by_name.get("params", None)
+    result_column_index = header_indices_by_name["result"]
+    result_detail_column_index = header_indices_by_name["result detail"]
 
     # Count results and prepare data
-    pass_count = 0
-    fail_count = 0
-    skip_count = 0
+    counts = ResultCounts()
     failed_tests = []
-    processed_rows = []
-    result_detail_counts = {}
+    counts_by_param = {}
 
     for row in data_rows:
+        result = row[result_column_index]
+        result_detail = row[result_detail_column_index]
+
+        counts.add_row(result, result_detail)
+
+        params = row[params_column_index] if params_column_index else None
+        if params:
+            if params not in counts_by_param:
+                counts_by_param[params] = ResultCounts()
+            counts_by_param[params].add_row(result, result_detail)
+
         # Make a copy of the row to avoid modifying the original
-        processed_row = row.copy()
+        processed_row = [escape_for_markdown(cell) for cell in row]
 
         # Count results and collect failed tests
         if result_column_index is not None and result_column_index < len(row):
             result_value = row[result_column_index].strip().lower()
             if result_value == "pass":
-                pass_count += 1
                 processed_row[result_column_index] = (
                     '<span style="color:green">Pass</span>'
                 )
             elif result_value == "fail":
-                fail_count += 1
                 processed_row[result_column_index] = (
                     '<span style="color:red">Fail</span>'
                 )
                 failed_tests.append(processed_row.copy())
             elif result_value == "skip":
-                skip_count += 1
                 processed_row[result_column_index] = (
                     '<span style="color:gray">Skip</span>'
                 )
 
-        # Count result details (excluding empty ones)
-        if result_detail_column_index is not None and result_detail_column_index < len(
-            row
-        ):
-            result_detail_value = row[result_detail_column_index].strip()
-            if result_detail_value:  # Only count non-empty result details
-                if result_detail_value in result_detail_counts:
-                    result_detail_counts[result_detail_value] += 1
-                else:
-                    result_detail_counts[result_detail_value] = 1
+    return AggregatedSummary(
+        counts=counts,
+        failed_tests=failed_tests,
+        counts_by_params=counts_by_param,
+        header=header,
+    )
+
+
+def escape_for_markdown(text: str) -> str:
+    """
+    Modify a string to properly display in a markdown table cell.
+    """
+    if not text:
+        return text
 
-        processed_rows.append(processed_row)
+    # Replace newlines with <br /> tags
+    escaped = text.replace("\n", "<br />")
+
+    # Escape backslashes.
+    escaped = escaped.replace("\\", "\\\\")
+
+    # Escape pipe characters that would break table structure
+    escaped = escaped.replace("|", "\\|")
+
+    return escaped
+
+
+def generate_markdown(csv_path: str, exit_code: int = 0):  # noqa (C901)
+    # Print warning if exit code is non-zero
+    if exit_code != 0:
+        print("> [!WARNING]")
+        print(
+            f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
+        )
+
+    results = aggregate_results(csv_path)
 
     # Generate Summary section
-    total_rows = len(data_rows)
     print("# Summary\n")
-    print(f"- **Pass**: {pass_count}/{total_rows}")
-    print(f"- **Fail**: {fail_count}/{total_rows}")
-    print(f"- **Skip**: {skip_count}/{total_rows}")
+    total_excluding_skips = results.counts.passes + results.counts.fails
+    pass_fraction = results.counts.passes / total_excluding_skips
+    fail_fraction = results.counts.fails / total_excluding_skips
+    print(
+        f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
+    )
+    print(
+        f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
+    )
+    print(f"- **Skip**: {results.counts.skips}")
+
+    if results.counts_by_params:
+        print("\n## Results by Parameters\n")
+
+        # Extract all unique parameter keys from the JSON strings
+        all_param_keys = set()
+        parsed_params = {}
+
+        for params_str in results.counts_by_params.keys():
+            # Parse the JSON string (it's a string representation of a dict)
+            params_dict = json.loads(params_str)
+            parsed_params[params_str] = params_dict
+            all_param_keys.update(params_dict.keys())
+
+        if parsed_params and len(parsed_params) > 1:
+            # Sort parameter keys for consistent column ordering
+            sorted_param_keys = sorted(all_param_keys)
+
+            # Create table header
+            header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
+            print("| " + " | ".join(header_cols) + " |")
+            print("|" + "|".join(["---"] * len(header_cols)) + "|")
+
+            # Create table rows
+            for params_str, counts in results.counts_by_params.items():
+                if params_str in parsed_params:
+                    params_dict = parsed_params[params_str]
+                    row_values = []
+
+                    # Add parameter values
+                    for key in sorted_param_keys:
+                        value = params_dict.get(key, "")
+                        row_values.append(str(value))
+
+                    pass_fraction = counts.passes / (counts.passes + counts.fails)
+
+                    # Add count values
+                    row_values.extend(
+                        [
+                            str(counts.passes),
+                            str(counts.fails),
+                            str(counts.skips),
+                            f"{pass_fraction*100:.2f}%",
+                        ]
+                    )
+
+                    print("| " + " | ".join(row_values) + " |")
+
+        print()
 
     print("## Failure Breakdown:")
-    total_rows_with_result_detail = sum(result_detail_counts.values())
-    for detail, count in sorted(result_detail_counts.items()):
+    total_rows_with_result_detail = sum(results.counts.by_detail.values())
+    for detail, count in sorted(results.counts.by_detail.items()):
         print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")
 
     # Generate Failed Tests section
     print("# Failed Tests\n")
-    if failed_tests:
-        print("| " + " | ".join(header) + " |")
-        print("|" + "|".join(["---"] * len(header)) + "|")
-        for row in failed_tests:
+    if results.failed_tests:
+        escaped_header = [escape_for_markdown(col) for col in results.header]
+        print("| " + " | ".join(escaped_header) + " |")
+        print("|" + "|".join(["---"] * len(results.header)) + "|")
+        for row in results.failed_tests:
             print("| " + " | ".join(row) + " |")
     else:
         print("No failed tests.\n")
diff --git a/backends/test/suite/models/__init__.py b/backends/test/suite/models/__init__.py
index 65b546b0eb5..ea44275a463 100644
--- a/backends/test/suite/models/__init__.py
+++ b/backends/test/suite/models/__init__.py
@@ -52,6 +52,11 @@ def wrapped_test(self):
             "use_dynamic_shapes": use_dynamic_shapes,
         }
         with TestContext(test_name, test_func.__name__, flow.name, params):
+            if flow.should_skip_test(test_name):
+                raise unittest.SkipTest(
+                    f"Skipping test due to matching flow {flow.name} skip patterns"
+                )
+
             test_func(self, flow, dtype, use_dynamic_shapes)
 
     wrapped_test._name = test_func.__name__  # type: ignore
diff --git a/backends/test/suite/operators/__init__.py b/backends/test/suite/operators/__init__.py
index 6ceb9086f71..9c550b3a49c 100644
--- a/backends/test/suite/operators/__init__.py
+++ b/backends/test/suite/operators/__init__.py
@@ -97,6 +97,11 @@ def _make_wrapped_test(
 ):
     def wrapped_test(self):
         with TestContext(test_name, test_base_name, flow.name, params):
+            if flow.should_skip_test(test_name):
+                raise unittest.SkipTest(
+                    f"Skipping test due to matching flow {flow.name} skip patterns"
+                )
+
             test_kwargs = copy.copy(params) or {}
             test_kwargs["flow"] = flow
 
diff --git a/backends/test/suite/operators/test_abs.py b/backends/test/suite/operators/test_abs.py
index fdfc6be671e..484281e294e 100644
--- a/backends/test/suite/operators/test_abs.py
+++ b/backends/test/suite/operators/test_abs.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -45,6 +47,7 @@ def test_abs_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(AbsModel(), (torch.randn(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_abs_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_amax.py b/backends/test/suite/operators/test_amax.py
index 0c9a8c06f0d..04e0b17ae0a 100644
--- a/backends/test/suite/operators/test_amax.py
+++ b/backends/test/suite/operators/test_amax.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import unittest
 from typing import List, Optional, Tuple, Union
 
 import torch
@@ -201,6 +202,7 @@ def test_amax_shapes(self, flow: TestFlow) -> None:
             flow,
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_amax_edge_cases(self, flow: TestFlow) -> None:
         x = torch.tensor([[1.0, float("inf"), 3.0], [4.0, 5.0, float("inf")]])
         self._test_op(
diff --git a/backends/test/suite/operators/test_amin.py b/backends/test/suite/operators/test_amin.py
index f4b88b1dade..7aa5c6b7a34 100644
--- a/backends/test/suite/operators/test_amin.py
+++ b/backends/test/suite/operators/test_amin.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import unittest
 from typing import List, Optional, Tuple, Union
 
 import torch
@@ -203,6 +204,7 @@ def test_amin_shapes(self, flow: TestFlow) -> None:
             flow,
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_amin_edge_cases(self, flow: TestFlow) -> None:
         x = torch.tensor([[1.0, float("-inf"), 3.0], [4.0, 5.0, float("-inf")]])
         self._test_op(
diff --git a/backends/test/suite/operators/test_argmax.py b/backends/test/suite/operators/test_argmax.py
index dc8b57fc214..ca3ae9e1805 100644
--- a/backends/test/suite/operators/test_argmax.py
+++ b/backends/test/suite/operators/test_argmax.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import unittest
 from typing import Optional
 
 import torch
@@ -143,6 +144,7 @@ def test_argmax_shapes(self, flow: TestFlow) -> None:
             flow,
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_argmax_edge_cases(self, flow: TestFlow) -> None:
         x = torch.tensor([[1.0, float("inf"), 3.0], [4.0, 5.0, float("inf")]])
         self._test_op(
diff --git a/backends/test/suite/operators/test_argmin.py b/backends/test/suite/operators/test_argmin.py
index d7a24e24f5a..aaf4e9bd167 100644
--- a/backends/test/suite/operators/test_argmin.py
+++ b/backends/test/suite/operators/test_argmin.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import unittest
 from typing import Optional
 
 import torch
@@ -143,6 +144,7 @@ def test_argmin_shapes(self, flow: TestFlow) -> None:
             flow,
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_argmin_edge_cases(self, flow: TestFlow) -> None:
         x = torch.tensor([[1.0, float("-inf"), 3.0], [4.0, 5.0, float("-inf")]])
         self._test_op(
diff --git a/backends/test/suite/operators/test_ceil.py b/backends/test/suite/operators/test_ceil.py
index 198c9e9fe16..4d7c0a5e888 100644
--- a/backends/test/suite/operators/test_ceil.py
+++ b/backends/test/suite/operators/test_ceil.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -45,6 +47,7 @@ def test_ceil_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(CeilModel(), (torch.randn(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_ceil_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_clamp.py b/backends/test/suite/operators/test_clamp.py
index 67c61c67caa..49419f0453a 100644
--- a/backends/test/suite/operators/test_clamp.py
+++ b/backends/test/suite/operators/test_clamp.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -56,6 +58,7 @@ def test_clamp_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(model, (torch.randn(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_clamp_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_exp.py b/backends/test/suite/operators/test_exp.py
index bdae5c6a5e6..54196d81ba9 100644
--- a/backends/test/suite/operators/test_exp.py
+++ b/backends/test/suite/operators/test_exp.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -46,6 +48,7 @@ def test_exp_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(ExpModel(), (torch.randn(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_exp_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_floor.py b/backends/test/suite/operators/test_floor.py
index fcc834afa16..bce9f0b4d34 100644
--- a/backends/test/suite/operators/test_floor.py
+++ b/backends/test/suite/operators/test_floor.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -42,6 +44,7 @@ def test_floor_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(FloorModel(), (torch.randn(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_floor_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_floor_divide.py b/backends/test/suite/operators/test_floor_divide.py
index 87104af11dc..c14151b6181 100644
--- a/backends/test/suite/operators/test_floor_divide.py
+++ b/backends/test/suite/operators/test_floor_divide.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -178,6 +180,7 @@ def test_floor_divide_values(self, flow: TestFlow) -> None:
         y = torch.tensor([-2.0]).expand_as(x).clone()
         self._test_op(model, (x, y), flow, generate_random_test_inputs=False)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_floor_divide_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
         model = FloorDivideModel()
diff --git a/backends/test/suite/operators/test_log.py b/backends/test/suite/operators/test_log.py
index 96ba8da1292..c4af1fe442b 100644
--- a/backends/test/suite/operators/test_log.py
+++ b/backends/test/suite/operators/test_log.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -46,6 +48,7 @@ def test_log_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(LogModel(), (torch.rand(3, 4, 5) + 0.01,), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_log_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
         # Tensor with infinity
diff --git a/backends/test/suite/operators/test_log10.py b/backends/test/suite/operators/test_log10.py
index 7d0e2e111d6..aeb97671f1b 100644
--- a/backends/test/suite/operators/test_log10.py
+++ b/backends/test/suite/operators/test_log10.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -46,6 +48,7 @@ def test_log10_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(Log10Model(), (torch.rand(3, 4, 5) + 0.01,), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_log10_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
         # Tensor with infinity
diff --git a/backends/test/suite/operators/test_log1p.py b/backends/test/suite/operators/test_log1p.py
index 383e3116b32..08a5c382076 100644
--- a/backends/test/suite/operators/test_log1p.py
+++ b/backends/test/suite/operators/test_log1p.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -46,6 +48,7 @@ def test_log1p_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(Log1pModel(), (torch.rand(3, 4, 5) * 2 - 0.5,), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_log1p_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
         # Tensor with infinity
diff --git a/backends/test/suite/operators/test_log2.py b/backends/test/suite/operators/test_log2.py
index ddcafaf08d2..16161d334f6 100644
--- a/backends/test/suite/operators/test_log2.py
+++ b/backends/test/suite/operators/test_log2.py
@@ -7,6 +7,8 @@
 # pyre-unsafe
 
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -46,6 +48,7 @@ def test_log2_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(Log2Model(), (torch.rand(3, 4, 5) + 0.01,), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_log2_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
         # Tensor with infinity
diff --git a/backends/test/suite/operators/test_mean.py b/backends/test/suite/operators/test_mean.py
index 746a4b16d9f..6c5c779364b 100644
--- a/backends/test/suite/operators/test_mean.py
+++ b/backends/test/suite/operators/test_mean.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import unittest
 from typing import List, Optional, Tuple, Union
 
 import torch
@@ -229,6 +230,7 @@ def test_mean_shapes(self, flow: TestFlow) -> None:
             flow,
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_mean_edge_cases(self, flow: TestFlow) -> None:
         x = torch.tensor([[1.0, float("inf"), 3.0], [4.0, 5.0, float("inf")]])
         self._test_op(
diff --git a/backends/test/suite/operators/test_median.py b/backends/test/suite/operators/test_median.py
index 93823b812ca..0b515d68efd 100644
--- a/backends/test/suite/operators/test_median.py
+++ b/backends/test/suite/operators/test_median.py
@@ -6,6 +6,7 @@
 
 # pyre-unsafe
 
+import unittest
 from typing import Optional
 
 import torch
@@ -167,6 +168,7 @@ def test_median_shapes(self, flow: TestFlow) -> None:
         # 5D tensor
         self._test_op(MedianValueOnlyModel(), (torch.randn(2, 2, 3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_median_edge_cases(self, flow: TestFlow) -> None:
         # Tensor with NaN (NaN should be propagated)
         x = torch.tensor([[1.0, float("nan"), 3.0], [4.0, 5.0, float("nan")]])
diff --git a/backends/test/suite/operators/test_neg.py b/backends/test/suite/operators/test_neg.py
index 35c9d851817..bc1adede877 100644
--- a/backends/test/suite/operators/test_neg.py
+++ b/backends/test/suite/operators/test_neg.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -55,6 +57,7 @@ def test_neg_shapes(self, flow: TestFlow) -> None:
             NegModel(), (torch.randn(3, 4, 5),), flow, generate_random_test_inputs=False
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_neg_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_pow.py b/backends/test/suite/operators/test_pow.py
index 334038d73d3..3082ad6ebaf 100644
--- a/backends/test/suite/operators/test_pow.py
+++ b/backends/test/suite/operators/test_pow.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -127,6 +129,7 @@ def test_pow_shapes(self, flow: TestFlow) -> None:
             model, (torch.rand(3, 4, 5) + 0.1,), flow, generate_random_test_inputs=False
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_pow_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_round.py b/backends/test/suite/operators/test_round.py
index ca8e6368d48..3a3577bea32 100644
--- a/backends/test/suite/operators/test_round.py
+++ b/backends/test/suite/operators/test_round.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -52,6 +54,7 @@ def test_round_values(self, flow: TestFlow) -> None:
         x = torch.arange(-5, 5, 0.5)  # [-5.0, -4.5, -4.0, ..., 4.0, 4.5]
         self._test_op(RoundModel(), (x,), flow, generate_random_test_inputs=False)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_round_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
@@ -98,6 +101,7 @@ def test_round_decimals(self, flow: TestFlow) -> None:
             RoundModel(decimals=-2), (x,), flow, generate_random_test_inputs=False
         )
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_round_decimals_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases with decimal places
 
diff --git a/backends/test/suite/operators/test_rsqrt.py b/backends/test/suite/operators/test_rsqrt.py
index 175bbcdb2cc..705833194fb 100644
--- a/backends/test/suite/operators/test_rsqrt.py
+++ b/backends/test/suite/operators/test_rsqrt.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -45,6 +47,7 @@ def test_rsqrt_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(RsqrtModel(), (torch.rand(3, 4, 5) + 0.01,), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_rsqrt_edge_cases(self, flow: TestFlow) -> None:
         # Tensor with infinity
         x = torch.tensor([float("inf"), 1.0, 4.0])
diff --git a/backends/test/suite/operators/test_sqrt.py b/backends/test/suite/operators/test_sqrt.py
index c3874dcb209..3d327ade6a5 100644
--- a/backends/test/suite/operators/test_sqrt.py
+++ b/backends/test/suite/operators/test_sqrt.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -45,6 +47,7 @@ def test_sqrt_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(SqrtModel(), (torch.rand(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_sqrt_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_square.py b/backends/test/suite/operators/test_square.py
index 52cd739bf9f..39ed212e426 100644
--- a/backends/test/suite/operators/test_square.py
+++ b/backends/test/suite/operators/test_square.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -44,6 +46,7 @@ def test_square_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(SquareModel(), (torch.randn(3, 4, 5),), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_square_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/operators/test_trunc.py b/backends/test/suite/operators/test_trunc.py
index 1d6d18817bd..71dcbf59176 100644
--- a/backends/test/suite/operators/test_trunc.py
+++ b/backends/test/suite/operators/test_trunc.py
@@ -6,6 +6,8 @@
 
 # pyre-unsafe
 
+import unittest
+
 import torch
 from executorch.backends.test.suite.flow import TestFlow
 
@@ -44,6 +46,7 @@ def test_trunc_shapes(self, flow: TestFlow) -> None:
         # 3D tensor
         self._test_op(TruncModel(), (torch.randn(3, 4, 5) * 5,), flow)
 
+    @unittest.skip("NaN and Inf are not enforced for backends.")
     def test_trunc_edge_cases(self, flow: TestFlow) -> None:
         # Test edge cases
 
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
index ce8a48dcc12..09e950ab672 100644
--- a/backends/test/suite/reporting.py
+++ b/backends/test/suite/reporting.py
@@ -1,4 +1,5 @@
 import csv
+import json
 
 from collections import Counter
 from dataclasses import dataclass, field
@@ -45,6 +46,8 @@
         ]
     )
 
+CSV_FIELD_NAMES.append("Error")
+
 
 # Operators that are excluded from the counts returned by count_ops. These are used to
 # exclude operatations that are not logically relevant or delegatable to backends.
@@ -341,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None:
 
 def _serialize_params(params: dict[str, Any] | None) -> str:
     if params is not None:
-        return str(dict(sorted(params.items())))
+        # Convert values to strings - JSON conversion doesn't like dtypes.
+        str_params = {k: str(v) for k, v in params.items()}
+        return json.dumps(str_params)
     else:
         return ""
 
@@ -365,6 +370,15 @@ def write_csv_header(output: TextIO):
 def write_csv_row(record: TestCaseSummary, output: TextIO):
     writer = csv.DictWriter(output, CSV_FIELD_NAMES)
 
+    # Truncate error message if it's too long, keeping first and last 200 characters
+    error_message = ""
+    if record.error is not None:
+        error_str = str(record.error)
+        if len(error_str) > 400:
+            error_message = error_str[:200] + "..." + error_str[-200:]
+        else:
+            error_message = error_str
+
     row = {
         "Test ID": record.name,
         "Test Case": record.base_name,
@@ -373,6 +387,7 @@ def write_csv_row(record: TestCaseSummary, output: TextIO):
         "Params": _serialize_params(record.params),
         "Result": record.result.to_short_str(),
         "Result Detail": record.result.to_detail_str(),
+        "Error": error_message,
         "Delegated": "True" if record.is_delegated() else "False",
         "Quantize Time (s)": (
             f"{record.quantize_time.total_seconds():.3f}"
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
index 1f84db9c730..a6d7d07bce0 100644
--- a/backends/test/suite/runner.py
+++ b/backends/test/suite/runner.py
@@ -15,6 +15,7 @@
 UNSUPPORTED_PORTABLE_OPS = {
     "aten::_embedding_bag",
     "aten::_adaptive_avg_pool2d",
+    "aten::adaptive_max_pool2d",
     "aten::median",
     "aten::median.dim",
     "aten::round.decimals",
@@ -34,6 +35,7 @@
     TestResult,
 )
 from executorch.exir import EdgeProgramManager
+from executorch.exir.dialects._ops import ops as exir_ops
 
 
 # A list of all runnable test suites and the corresponding python package.
@@ -43,6 +45,24 @@
 }
 
 
+def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bool:
+    # Returns true if the model contains patterns that will fail when running on the ET
+    # portable kernel library.
+
+    # Check for 3d convolutions. All convs (1d, 2d, 3d) use the same op, so we need to look at
+    # the input meta to determine the rank.
+    for node in program.graph.nodes:
+        if (
+            node.op == "call_function"
+            and node.target == exir_ops.edge.aten.convolution.default
+        ):
+            in_rank = node.args[0].meta["val"].dim()
+            if in_rank > 4:
+                return True
+
+    return False
+
+
 def _get_test_seed(test_base_name: str) -> int:
     # Set the seed based on the test base name to give consistent inputs between backends. Add the
     # run seed to allow for reproducible results, but still allow for run-to-run variation.
@@ -162,7 +182,7 @@ def build_result(
     # Check if any undelegated ops are in the unsupported ops set.
     has_unsupported_ops = any(
         op in UNSUPPORTED_PORTABLE_OPS for op in undelegated_op_counts.keys()
-    )
+    ) or _graph_has_unsupported_patterns(edge_manager._etrecord.edge_dialect_program)
 
     # Skip the test if there are unsupported portable ops remaining.
     if has_unsupported_ops:
@@ -171,8 +191,11 @@ def build_result(
     # Only run the runtime portion if something was delegated (or the flow doesn't delegate)
     if is_delegated or not flow.is_delegated:
         try:
-            tester.to_executorch().serialize()
-            extra_stats["pte_size_bytes"] = len(tester.get_artifact())
+            tester.to_executorch()
+
+            if flow.supports_serialize:
+                tester.serialize()
+                extra_stats["pte_size_bytes"] = len(tester.get_artifact())
         except Exception as e:
             # We could introduce a result value for this, but I'm not sure it's necessary.
             # We can do this if we ever see to_executorch() or serialize() fail due a backend issue.
diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py
index 58ff76cba17..e42681fc678 100644
--- a/backends/test/suite/tests/test_reporting.py
+++ b/backends/test/suite/tests/test_reporting.py
@@ -1,3 +1,4 @@
+import json
 import unittest
 
 from csv import DictReader
@@ -102,14 +103,16 @@ def test_csv_report_simple(self):
         self.assertEqual(records[2]["Test Case"], "test2")
         self.assertEqual(records[2]["Flow"], "flow1")
         self.assertEqual(records[2]["Result"], "Pass")
-        self.assertEqual(records[2]["Params"], str({"dtype": torch.float32}))
+        self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"}))
 
         # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
         self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
         self.assertEqual(records[3]["Test Case"], "test2")
         self.assertEqual(records[3]["Flow"], "flow1")
         self.assertEqual(records[3]["Result"], "Skip")
-        self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True}))
+        self.assertEqual(
+            records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"})
+        )
 
     def test_count_ops(self):
         """