From 2e47b218fc4ecdf42d5aef6d8bbde91e9a2a8e7c Mon Sep 17 00:00:00 2001
From: rkishner <rkishner@redhat.com>
Date: Thu, 20 Nov 2025 20:29:52 +0200
Subject: [PATCH] separate subtests failures to give better indication on tests
 analysis

Signed-off-by: Roni Kishner<rkishner@redhat.com>
---
 AUTHORS                    |  1 +
 changelog/13986.bugfix.rst |  6 +++++
 src/_pytest/pytester.py    | 10 +++++---
 src/_pytest/subtests.py    | 28 ++++++++++++++++++++++-
 src/_pytest/terminal.py    | 39 ++++++++++++++++++++++++++++++-
 testing/test_subtests.py   | 47 +++++++++++++++++++-------------------
 6 files changed, 102 insertions(+), 29 deletions(-)
 create mode 100644 changelog/13986.bugfix.rst

diff --git a/AUTHORS b/AUTHORS
index a089ca678f7..75bf8090cf6 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -395,6 +395,7 @@ Roland Puntaier
 Romain Dorgueil
 Roman Bolshakov
 Ronny Pfannschmidt
+Roni Kishner
 Ross Lawley
 Ruaridh Williamson
 Russel Winder
diff --git a/changelog/13986.bugfix.rst b/changelog/13986.bugfix.rst
new file mode 100644
index 00000000000..28ed1875e60
--- /dev/null
+++ b/changelog/13986.bugfix.rst
@@ -0,0 +1,6 @@
+Show subtests failures separate from normal test failures in the final test summary.
+
+Subtest failures are now reported separately as "subtests failed" instead of being counted as regular "failed" tests, providing clearer statistics.
+
+For example, a test with 3 subtests where 1 fails and 2 pass now shows:
+``1 failed, 2 subtests passed, 1 subtests failed`` instead of ``2 failed, 2 subtests passed``.
diff --git a/src/_pytest/pytester.py b/src/_pytest/pytester.py
index 1cd5f05dd7e..6694c87fa43 100644
--- a/src/_pytest/pytester.py
+++ b/src/_pytest/pytester.py
@@ -510,8 +510,9 @@ def _config_for_test() -> Generator[Config]:
 
 # Regex to match the session duration string in the summary: "74.34s".
 rex_session_duration = re.compile(r"\d+\.\d\ds")
-# Regex to match all the counts and phrases in the summary line: "34 passed, 111 skipped".
-rex_outcome = re.compile(r"(\d+) (\w+)")
+# Regex to match all the counts and phrases in the summary line:
+# "34 passed, 111 skipped, 3 subtests passed, 1 subtests failed".
+rex_outcome = re.compile(r"(\d+) ([\w\s]+?)(?=,| in|$)")
 
 
 @final
@@ -578,7 +579,7 @@ def parse_summary_nouns(cls, lines) -> dict[str, int]:
         for line in reversed(lines):
             if rex_session_duration.search(line):
                 outcomes = rex_outcome.findall(line)
-                ret = {noun: int(count) for (count, noun) in outcomes}
+                ret = {noun.strip(): int(count) for (count, noun) in outcomes}
                 break
         else:
             raise ValueError("Pytest terminal summary report not found")
@@ -586,6 +587,9 @@ def parse_summary_nouns(cls, lines) -> dict[str, int]:
         to_plural = {
             "warning": "warnings",
             "error": "errors",
+            "subtest failed": "subtests failed",
+            "subtest passed": "subtests passed",
+            "subtest skipped": "subtests skipped",
         }
         return {to_plural.get(k, k): v for k, v in ret.items()}
 
diff --git a/src/_pytest/subtests.py b/src/_pytest/subtests.py
index a96b11f1fe4..45cfd8fa057 100644
--- a/src/_pytest/subtests.py
+++ b/src/_pytest/subtests.py
@@ -220,6 +220,30 @@ def __enter__(self) -> None:
             capturing_logs(self.request)
         )
 
+    def _log_parent_test_report(
+        self, stop: float, duration: float, sub_report: SubtestReport
+    ) -> None:
+        """Log the parent test report before propagating Exit exception.
+
+        This ensures the parent test is marked as failed if it contains
+        failed subtests, so both the parent test and subtest failures
+        are counted in the summary.
+        """
+        parent_call_info = CallInfo[None](
+            None,
+            None,
+            start=self._start,
+            stop=stop,
+            duration=duration,
+            when="call",
+            _ispytest=True,
+        )
+        parent_report = self.ihook.pytest_runtest_makereport(
+            item=self.request.node, call=parent_call_info
+        )
+        with self.suspend_capture_ctx():
+            self.ihook.pytest_runtest_logreport(report=parent_report)
+
     def __exit__(
         self,
         exc_type: type[BaseException] | None,
@@ -271,6 +295,8 @@ def __exit__(
 
         if exc_val is not None:
             if isinstance(exc_val, get_reraise_exceptions(self.config)):
+                if sub_report.failed:
+                    self._log_parent_test_report(stop, duration, sub_report)
                 return False
             if self.request.session.shouldfail:
                 return False
@@ -387,7 +413,7 @@ def pytest_report_teststatus(
             return category, short, f"{status}{description}"
 
         if report.failed:
-            return outcome, "u", f"SUBFAILED{description}"
+            return "subtests failed", "u", f"SUBFAILED{description}"
         else:
             if report.passed:
                 if quiet:
diff --git a/src/_pytest/terminal.py b/src/_pytest/terminal.py
index e66e4f48dd6..7b45d770abf 100644
--- a/src/_pytest/terminal.py
+++ b/src/_pytest/terminal.py
@@ -8,6 +8,7 @@
 
 import argparse
 from collections import Counter
+from collections import defaultdict
 from collections.abc import Callable
 from collections.abc import Generator
 from collections.abc import Mapping
@@ -1167,6 +1168,35 @@ def summary_failures(self) -> None:
         style = self.config.option.tbstyle
         self.summary_failures_combined("failed", "FAILURES", style=style)
 
+    def _add_subtests_to_failed_reports(
+        self, failed_reports: list[BaseReport]
+    ) -> list[BaseReport]:
+        """Combine failed reports with subtest failed reports, ordering subtests before main tests.
+
+        For each test nodeid, subtest failures are shown before the main test failure.
+        """
+        subtest_failed_reports = self.getreports("subtests failed")
+        subtest_reports_by_nodeid: dict[str, list[BaseReport]] = defaultdict(list)
+        for rep in subtest_failed_reports:
+            subtest_reports_by_nodeid[rep.nodeid].append(rep)
+
+        ordered_reports: list[BaseReport] = []
+        seen_nodeids: set[str] = set()
+
+        for rep in failed_reports:
+            nodeid = rep.nodeid
+            if nodeid not in seen_nodeids:
+                seen_nodeids.add(nodeid)
+                if nodeid in subtest_reports_by_nodeid:
+                    ordered_reports.extend(subtest_reports_by_nodeid[nodeid])
+            ordered_reports.append(rep)
+
+        for nodeid, subtests in subtest_reports_by_nodeid.items():
+            if nodeid not in seen_nodeids:
+                ordered_reports.extend(subtests)
+
+        return ordered_reports
+
     def summary_xfailures(self) -> None:
         show_tb = self.config.option.xfail_tb
         style = self.config.option.tbstyle if show_tb else "no"
@@ -1183,6 +1213,10 @@ def summary_failures_combined(
         if style != "no":
             if not needed_opt or self.hasopt(needed_opt):
                 reports: list[BaseReport] = self.getreports(which_reports)
+
+                if which_reports == "failed":
+                    reports = self._add_subtests_to_failed_reports(reports)
+
                 if not reports:
                     return
                 self.write_sep("=", sep_title)
@@ -1272,6 +1306,9 @@ def show_simple(lines: list[str], *, stat: str) -> None:
             if not failed:
                 return
             config = self.config
+            # For failed reports, also include subtests failed reports
+            if stat == "failed":
+                failed = self._add_subtests_to_failed_reports(failed)
             for rep in failed:
                 color = _color_for_type.get(stat, _color_for_type_default)
                 line = _get_line_with_reprcrash_message(
@@ -1380,7 +1417,7 @@ def _get_main_color(self) -> tuple[str, list[str]]:
 
     def _determine_main_color(self, unknown_type_seen: bool) -> str:
         stats = self.stats
-        if "failed" in stats or "error" in stats:
+        if "failed" in stats or "error" in stats or "subtests failed" in stats:
             main_color = "red"
         elif "warnings" in stats or "xpassed" in stats or unknown_type_seen:
             main_color = "yellow"
diff --git a/testing/test_subtests.py b/testing/test_subtests.py
index 6849df53622..674402c48bc 100644
--- a/testing/test_subtests.py
+++ b/testing/test_subtests.py
@@ -55,7 +55,7 @@ def test_zaz(subtests):
         [
             "test_*.py uFuF.    *     [[]100%[]]",
             *summary_lines,
-            "* 4 failed, 1 passed in *",
+            "* 2 failed, 1 passed, 2 subtests failed in *",
         ]
     )
 
@@ -69,7 +69,7 @@ def test_zaz(subtests):
             "test_*.py::test_zaz SUBPASSED[[]zaz subtest[]]    *     [[]100%[]]",
             "test_*.py::test_zaz PASSED                        *     [[]100%[]]",
             *summary_lines,
-            "* 4 failed, 1 passed, 1 subtests passed in *",
+            "* 2 failed, 1 passed, 1 subtests passed, 2 subtests failed in *",
         ]
     )
     pytester.makeini(
@@ -87,7 +87,7 @@ def test_zaz(subtests):
             "test_*.py::test_bar FAILED                        *     [[] 66%[]]",
             "test_*.py::test_zaz PASSED                        *     [[]100%[]]",
             *summary_lines,
-            "* 4 failed, 1 passed in *",
+            "* 2 failed, 2 subtests failed, 1 passed in *",
         ]
     )
     result.stdout.no_fnmatch_line("test_*.py::test_zaz SUBPASSED[[]zaz subtest[]]*")
@@ -307,7 +307,7 @@ def test_foo(subtests, x):
             "*.py::test_foo[[]1[]] SUBFAILED[[]custom[]] (i=1) *[[]100%[]]",
             "*.py::test_foo[[]1[]] FAILED                      *[[]100%[]]",
             "contains 1 failed subtest",
-            "* 4 failed, 4 subtests passed in *",
+            "* 2 failed, 4 subtests passed, 2 subtests failed in *",
         ]
     )
 
@@ -325,7 +325,7 @@ def test_foo(subtests, x):
             "*.py::test_foo[[]1[]] SUBFAILED[[]custom[]] (i=1) *[[]100%[]]",
             "*.py::test_foo[[]1[]] FAILED                      *[[]100%[]]",
             "contains 1 failed subtest",
-            "* 4 failed in *",
+            "* 2 failed, 2 subtests failed in *",
         ]
     )
 
@@ -344,7 +344,7 @@ def test_foo(subtests):
     result = pytester.runpytest("-v")
     result.stdout.fnmatch_lines(
         [
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
@@ -365,7 +365,7 @@ def test_foo(subtests):
     result.stdout.fnmatch_lines(
         [
             "*AssertionError: top-level failure",
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
@@ -386,14 +386,14 @@ def test_foo(subtests):
     result = pytester.runpytest("-v")
     result.stdout.fnmatch_lines(
         [
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
     result = pytester.runpytest("-v", flag)
     result.stdout.fnmatch_lines(
         [
-            "* 2 failed, 2 subtests passed in *",
+            "* 1 failed, 2 subtests passed, 1 subtests failed in *",
         ]
     )
 
@@ -427,7 +427,7 @@ def test_zaz(self):
         result = pytester.runpytest()
         result.stdout.fnmatch_lines(
             [
-                "* 3 failed, 2 passed in *",
+                "* 1 failed, 2 passed, 2 subtests failed in *",
             ]
         )
 
@@ -578,9 +578,7 @@ def test_foo(self):
         result.stdout.fnmatch_lines(
             [
                 "*.py u.                                                           *            [[]100%[]]",
-                "*=== short test summary info ===*",
-                "SUBFAILED[[]subtest 2[]] *.py::T::test_foo - AssertionError: fail subtest 2",
-                "* 1 failed, 1 passed in *",
+                "* 1 passed, 1 subtests failed in *",
             ]
         )
 
@@ -590,9 +588,9 @@ def test_foo(self):
                 "*.py::T::test_foo SUBSKIPPED[[]subtest 1[]] (skip subtest 1)      *            [[]100%[]]",
                 "*.py::T::test_foo SUBFAILED[[]subtest 2[]]                        *            [[]100%[]]",
                 "*.py::T::test_foo PASSED                                          *            [[]100%[]]",
+                "*=== short test summary info ===*",
                 "SUBSKIPPED[[]subtest 1[]] [[]1[]] *.py:*: skip subtest 1",
-                "SUBFAILED[[]subtest 2[]] *.py::T::test_foo - AssertionError: fail subtest 2",
-                "* 1 failed, 1 passed, 1 skipped in *",
+                "* 1 passed, 1 skipped, 1 subtests failed in *",
             ]
         )
 
@@ -607,9 +605,7 @@ def test_foo(self):
             [
                 "*.py::T::test_foo SUBFAILED[[]subtest 2[]]                        *            [[]100%[]]",
                 "*.py::T::test_foo PASSED                                          *            [[]100%[]]",
-                "*=== short test summary info ===*",
-                r"SUBFAILED[[]subtest 2[]] *.py::T::test_foo - AssertionError: fail subtest 2",
-                r"* 1 failed, 1 passed in *",
+                "* 1 passed, 1 subtests failed in *",
             ]
         )
         result.stdout.no_fnmatch_line(
@@ -814,7 +810,7 @@ def test(subtests):
         result = pytester.runpytest("-p no:logging")
         result.stdout.fnmatch_lines(
             [
-                "*2 failed in*",
+                "*1 failed, 1 subtests failed in*",
             ]
         )
         result.stdout.no_fnmatch_line("*root:test_no_logging.py*log line*")
@@ -899,12 +895,15 @@ def test_foo(subtests):
         """
     )
     result = pytester.runpytest("--exitfirst")
-    assert result.parseoutcomes()["failed"] == 2
+    outcomes = result.parseoutcomes()
+    assert outcomes["failed"] == 1
+    assert outcomes["subtests failed"] == 1
     result.stdout.fnmatch_lines(
         [
-            "SUBFAILED*[[]sub1[]] *.py::test_foo - assert False*",
+            "*=== short test summary info ===*",
             "FAILED *.py::test_foo - assert False",
-            "* stopping after 2 failures*",
+            "*stopping after 2 failures*",
+            "*1 failed, 1 subtests failed*",
         ],
         consecutive=True,
     )
@@ -926,7 +925,7 @@ def test2(): pass
     result.stdout.fnmatch_lines(
         [
             "* _pytest.outcomes.Exit *",
-            "* 1 failed in *",
+            "*1 failed, 1 subtests failed in*",
         ]
     )
 
@@ -952,9 +951,9 @@ def test(subtests):
         [
             "SUBFAILED[b] test_nested.py::test - AssertionError: b failed",
             "SUBFAILED[a] test_nested.py::test - AssertionError: a failed",
-            "* 3 failed in *",
         ]
     )
+    result.stdout.fnmatch_lines(["* 3 failed in *"])
 
 
 def test_serialization() -> None: