From 0022cb7baf2c9a241c7133595d76ff2cafb4963b Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 22 May 2026 18:36:34 -0700 Subject: [PATCH 1/6] wip commit --- .../pytest_plugin/_step_status_capture.py | 65 ++ .../pytest_plugin/step_status_states.md | 126 ++++ .../_tests/pytest_plugin/test_pass_fail.py | 621 ++++++++++++++++++ python/lib/sift_client/pytest_plugin.py | 158 ++++- .../util/test_results/context_manager.py | 51 +- 5 files changed, 1001 insertions(+), 20 deletions(-) create mode 100644 python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py create mode 100644 python/lib/sift_client/_tests/pytest_plugin/step_status_states.md create mode 100644 python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py new file mode 100644 index 000000000..6838fdcc5 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py @@ -0,0 +1,65 @@ +"""Shared state for the step-status characterization suite. + +The outer test in ``test_step_status_states.py`` runs inner pytest sessions +via ``pytester``. The inner session installs a fake ``sift_client`` (see +``_INNER_CONFTEST_SRC`` in that file) which records every step status +write into this module's ``CAPTURED_STEPS`` dict so the outer test can +assert on what the plugin produced. + +This lives in its own module (rather than inside the test file) because +the inner ``conftest.py`` runs in a fresh pytester tmp dir and needs an +importable, package-reachable handle to the same dict object. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from sift_client.sift_types.test_report import TestStatus + + +@dataclass +class CapturedStep: + step_id: str + name: str + step_path: str + parent_step_id: str | None + statuses: list[TestStatus] = field(default_factory=list) + + +CAPTURED_STEPS: dict[str, CapturedStep] = {} + + +def reset() -> None: + CAPTURED_STEPS.clear() + + +def steps_by_name(name: str) -> list[CapturedStep]: + return [s for s in CAPTURED_STEPS.values() if s.name == name] + + +def test_step(name: str) -> CapturedStep | None: + """The step the autouse ``step`` fixture creates for the test function. + + There can be a deeper step with the same name when the ``makereport`` + hook also records one (e.g. ``pytest.skip()`` inside the test body, or + an ``xfail`` mark). The autouse step is the shallowest of those, so + pick by step_path depth. + """ + matches = [s for s in CAPTURED_STEPS.values() if s.name == name] + if not matches: + return None + return min(matches, key=lambda s: s.step_path.count(".")) + + +def child_steps(parent: CapturedStep) -> list[CapturedStep]: + return [s for s in CAPTURED_STEPS.values() if s.parent_step_id == parent.step_id] + + +def final_status(name: str) -> TestStatus | None: + step = test_step(name) + if step is None or not step.statuses: + return None + return step.statuses[-1] diff --git a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md new file mode 100644 index 000000000..83b64783d --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md @@ -0,0 +1,126 @@ +# Pytest-plugin step-status: observed vs. target + +Companion document to `test_step_status_states.py`. Each row corresponds to +one scenario in that suite. The **target** column is the contract the suite +asserts (sourced from +[`docs/guides/pytest_plugin/pass_fail_behavior.md`](../../../../docs/guides/pytest_plugin/pass_fail_behavior.md)); +the **observed today** column records what the plugin actually produces +right now. Every row should be marked `OK`; a `Gap` indicates the plugin has +regressed against the contract. + +`TestStatus` values referenced below come from +`sift_client.sift_types.test_report.TestStatus`: `PASSED`, `FAILED`, `ERROR`, +`SKIPPED`, `IN_PROGRESS`. The targets below map every scenario onto these +existing statuses. An `ABORTED` status for hard process exits (`SystemExit`, +`KeyboardInterrupt`, signals) is a planned future addition; until it lands +those cases baseline against `ERROR` or `IN_PROGRESS`. The user-facing +contract these targets describe is documented in +[`docs/guides/pytest_plugin/pass_fail_behavior.md`](../../../../docs/guides/pytest_plugin/pass_fail_behavior.md). + +## Case ID scheme + +Each scenario has a stable case ID of the form `PREFIX-NN`, where the +prefix names its section. Tests in `test_step_status_states.py` reference +their case ID in a leading comment so a failing test can be traced back to +this table without rereading the scenario: + +| Prefix | Section | +| ------- | ---------------------------------------- | +| `CALL` | Call-phase exit paths | +| `SKIP` | Skip paths | +| `XFAIL` | xfail / xpass | +| `PHASE` | Setup / teardown phases | +| `COLL` | Collection / fixture-resolution failures | +| `API` | Plugin-API exit paths | + +IDs are stable: a new scenario in a section takes the next free number for +that prefix; numbers are never reused or shifted when other sections grow. + +## Call-phase exit paths + +| Case | Scenario | Trigger | Observed today | Target | Status | +| --------- | --------------------------------------- | --------------------------------------------- | --------------------------- | ------------------------------------------ | ------ | +| `CALL-01` | Test passes | function body returns cleanly | `PASSED` | `PASSED` | OK | +| `CALL-02` | Assert failure in call phase | `assert 1 == 2` | `FAILED` | `FAILED` | OK | +| `CALL-03` | Generic exception in call phase | `raise ValueError("boom")` | `ERROR` | `ERROR` | OK | +| `CALL-04` | `pytest.fail("...")` from body | `pytest.fail("intentional failure")` | `FAILED` | `FAILED` | OK | +| `CALL-05` | `SystemExit` from the test body | `sys.exit(1)` | `ERROR` | `ERROR` (baseline; `ABORTED` planned later) | OK | +| `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `IN_PROGRESS` (session aborts before the plugin sees the interrupt) | `ERROR` when the plugin sees the interrupt; a session-aborting interrupt leaves the step in `IN_PROGRESS` | OK | + +## Skip paths + +| Case | Scenario | Trigger | Observed today | Target | Status | +| --------- | --------------------------------------- | --------------------------------------------- | --------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | +| `SKIP-01` | Collection-time skip | `@pytest.mark.skip(reason=...)` | `SKIPPED` (only the makereport hook records a step; no autouse step ran) | `SKIPPED` | OK | +| `SKIP-02` | Conditional collection-time skip | `@pytest.mark.skipif(True, reason=...)` | `SKIPPED` (same route as `@pytest.mark.skip`) | `SKIPPED` | OK | +| `SKIP-03` | Runtime skip in body | `pytest.skip("...")` | Outer step `SKIPPED`; no duplicate nested step | Outer step `SKIPPED`; no duplicate nested step | OK | +| `SKIP-04` | Skip raised inside a fixture | `@pytest.fixture` calls `pytest.skip("...")` | Outer step `SKIPPED` (setup-phase skip); no duplicate nested step | Outer step `SKIPPED` (setup-phase skip); no duplicate nested step | OK | + +## xfail / xpass + +| Case | Scenario | Trigger | Observed today | Target | Status | +| ---------- | ----------------------------------------- | ------------------------------------------------------ | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------- | ------ | +| `XFAIL-01` | xfail-marked test that fails | `@pytest.mark.xfail` + `assert 1 == 2` | Outer step `PASSED` (test fulfilled the xfail expectation); no duplicate nested step | Outer step `PASSED` (test fulfilled the xfail expectation); no duplicate nested step | OK | +| `XFAIL-02` | Strict xfail that unexpectedly passes | `@pytest.mark.xfail(strict=True)` + `assert True` | Outer step `FAILED` (mark no longer matches reality — either the bug was fixed or the test stopped testing what it claimed) | Outer step `FAILED` (mark no longer matches reality — either the bug was fixed or the test stopped testing what it claimed) | OK | +| `XFAIL-03` | Non-strict xfail that unexpectedly passes | `@pytest.mark.xfail()` + `assert True` | Outer step `PASSED` | Outer step `PASSED` (`strict=False` doesn't insist on the failure) | OK | +| `XFAIL-04` | `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` (the `raises=` mismatch is a real test failure) | `FAILED` (the `raises=` mismatch is a real test failure) | OK | +| `XFAIL-05` | `xfail(run=False)` | `@pytest.mark.xfail(run=False)` (body never executed) | `SKIPPED` (the test never ran) | `SKIPPED` (the test never ran) | OK | + +## Setup / teardown phases + +| Case | Scenario | Trigger | Observed today | Target | Status | +| ---------- | ------------------------------------------ | -------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- | ------ | +| `PHASE-01` | Setup-phase fixture failure (RuntimeError) | `@pytest.fixture` raises before `yield`; test body never runs | Outer step `ERROR`; the plugin reads the setup-phase report and maps `failed` → `ERROR` | `ERROR` (a `phase=setup` annotation is a planned follow-up) | OK | +| `PHASE-02` | Teardown-phase fixture failure | `@pytest.fixture` raises after `yield`; test body passed | Outer step `FAILED`; after teardown the plugin upgrades a passed step when the teardown report shows `failed` | `FAILED` (a `phase=teardown` annotation is a planned follow-up) | OK | +| `PHASE-03` | Call-phase fail **plus** teardown-phase fail | `assert 1 == 2` in body AND `@pytest.fixture` raises after `yield` | Outer step `FAILED` (the call-phase failure dominates); the teardown error is not yet surfaced separately | `FAILED`; surfacing the teardown error alongside is a planned follow-up | OK | + +## Collection / fixture-resolution failures + +| Case | Scenario | Trigger | Observed today | Target | Status | +| --------- | --------------------------------------- | --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- | ------ | +| `COLL-01` | Missing fixture | `def test_x(nonexistent_fixture):` | Outer step `ERROR` — the missing fixture surfaces as a setup-phase failure, which the plugin now maps to `ERROR` | `ERROR` (a `phase=setup` annotation is a planned follow-up) | OK | + +## Plugin-API exit paths (in-test mutations) + +| Case | Scenario | Trigger | Observed today | Target | Status | +| -------- | --------------------------------------- | ---------------------------------------------------------------------- | ----------------------------------------------------------- | -------- | ------ | +| `API-01` | Manual status override | `step.current_step.update({"status": TestStatus.FAILED})` | `FAILED` | `FAILED` | OK | +| `API-02` | `report_outcome(result=False)` | `step.report_outcome("the_check", False, "did not match")` | `FAILED` | `FAILED` | OK | +| `API-03` | `measure(...)` out-of-bounds | `step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0})` | `FAILED` | `FAILED` | OK | +| `API-04` | Failed measurement on a substep | `with step.substep(...) as s: s.measure(... out-of-bounds)` | `FAILED` (propagates from substep to parent) | `FAILED` | OK | +| `API-05` | Manually-skipped substep | `with step.substep(...) as s: s.current_step.update({"status": SKIPPED})` | Parent step `PASSED` (skip does not propagate as a failure) | `PASSED` | OK | + +## Out of scope for this characterization run + +- **Timeout** — needs `pytest-timeout` or a manual signal harness. Add as a + follow-up once the audit picks a timeout strategy. +- **Signal (SIGKILL / SIGTERM)** — cannot be caught from inside the process; + needs a subprocess-level harness. +- **`pytest.exit("...")`** — niche; the "aborts subsequent tests" behavior + is hard to characterize cleanly because each `pytester` invocation is its + own session. Document the expectation alongside `SystemExit`. +- **`os._exit()`** — bypasses Python cleanup entirely; can't be tested + in-process because it would kill the outer pytest run. Document as a + guaranteed data-loss case alongside `SystemExit` / `SIGKILL`. +- **Parametrize-level marks** (`pytest.param(..., marks=pytest.mark.xfail / skip)`) + — routes through a different selection path but produces the same + `report.outcome`, so behavior should match the function-level marks + already covered above. Add only if the plugin's eventual phase-aware + handler diverges between the two. +- **Import error / syntax error / `conftest.py` error** — these fail + collection entirely; no `item` is produced and no plugin hook fires. + Document explicitly that no Sift step is recorded. +- **No-data / indeterminate** — tracked separately as part of the sibling + status-semantics work. + +## How to refresh this table + +Run the suite locally: + +``` +pytest lib/sift_client/_tests/util/test_step_status_states.py -v +``` + +Every row should be `OK`. If a row regresses to `Gap`, the matching test +fails; update the **Observed today** column here to describe the +regression and flip the row's status to **Gap** until the plugin is fixed. diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py new file mode 100644 index 000000000..c5027fb41 --- /dev/null +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -0,0 +1,621 @@ +"""Contract suite: maps each pytest exit path to the ``TestStatus`` the +Sift pytest plugin is required to record on the outer step. + +Each scenario writes a tiny inner test file and runs it through pytester +with a fake ``sift_client`` injected via a generated conftest. The fake +records every step status write into ``_step_status_capture.CAPTURED_STEPS`` +so this outer test can assert on what the plugin produced. + +Assertions encode the contract from +``docs/guides/pytest_plugin/pass_fail_behavior.md``. Tests for scenarios the +plugin does not yet handle correctly are expected to **fail today** — they +are the punch list. ``lib/sift_client/_tests/pytest_plugin/step_status_states.md`` +tracks each scenario's observed-today behavior next to the target so the +remaining gaps are visible without running the suite. +""" + +from __future__ import annotations + +import textwrap + +import pytest + +from sift_client._tests.pytest_plugin import _step_status_capture as capture +from sift_client.sift_types.test_report import TestStatus + +pytest_plugins = ["pytester"] + + +_INNER_CONFTEST_SRC = ''' +"""Auto-generated conftest for the step-status characterization suite. + +Installs the Sift pytest plugin and a fake ``sift_client`` that records +every step status write into the outer test's CAPTURED_STEPS dict. +""" + +from __future__ import annotations + +import uuid + +import pytest + +# Bring the Sift fixtures + the makereport hook into this inner session. +pytest_plugins = ["sift_client.pytest_plugin"] + +from sift_client._tests.pytest_plugin._step_status_capture import CAPTURED_STEPS, CapturedStep +from sift_client.sift_types.test_report import ( + TestMeasurement, + TestReport, + TestStep, +) + + + +class _FakeTestResults: + def __init__(self, client): + self._client = client + + def create(self, test_report, log_file=None): + report = TestReport( + id_=str(uuid.uuid4()), + status=test_report.status, + name=test_report.name, + test_system_name=test_report.test_system_name, + test_case=test_report.test_case, + start_time=test_report.start_time, + end_time=test_report.end_time, + metadata=test_report.metadata or {}, + is_archived=False, + ) + report._apply_client_to_instance(self._client) + return report + + def update(self, test_report, update, log_file=None): + return test_report + + def create_step(self, test_step, log_file=None): + step_id = str(uuid.uuid4()) + CAPTURED_STEPS[step_id] = CapturedStep( + step_id=step_id, + name=test_step.name, + step_path=test_step.step_path, + parent_step_id=test_step.parent_step_id, + statuses=[test_step.status], + ) + step = TestStep( + id_=step_id, + test_report_id=test_step.test_report_id, + parent_step_id=test_step.parent_step_id, + name=test_step.name, + description=test_step.description, + step_type=test_step.step_type, + step_path=test_step.step_path, + status=test_step.status, + start_time=test_step.start_time, + end_time=test_step.end_time, + error_info=test_step.error_info, + ) + step._apply_client_to_instance(self._client) + return step + + def update_step(self, test_step, update, log_file=None): + new_status = ( + update.get("status") if isinstance(update, dict) else update.status + ) + if test_step.id_ in CAPTURED_STEPS and new_status is not None: + CAPTURED_STEPS[test_step.id_].statuses.append(new_status) + merged_status = new_status if new_status is not None else test_step.status + updated = TestStep( + id_=test_step.id_, + test_report_id=test_step.test_report_id, + parent_step_id=test_step.parent_step_id, + name=test_step.name, + description=test_step.description, + step_type=test_step.step_type, + step_path=test_step.step_path, + status=merged_status, + start_time=test_step.start_time, + end_time=test_step.end_time, + error_info=test_step.error_info, + ) + updated._apply_client_to_instance(self._client) + return updated + + def create_measurement(self, test_measurement, update_step=False, log_file=None): + measurement = TestMeasurement( + id_=str(uuid.uuid4()), + measurement_type=test_measurement.measurement_type, + name=test_measurement.name, + test_step_id=test_measurement.test_step_id, + numeric_value=test_measurement.numeric_value, + string_value=test_measurement.string_value, + boolean_value=test_measurement.boolean_value, + unit=test_measurement.unit, + numeric_bounds=test_measurement.numeric_bounds, + string_expected_value=test_measurement.string_expected_value, + passed=test_measurement.passed, + timestamp=test_measurement.timestamp, + ) + measurement._apply_client_to_instance(self._client) + return measurement + + +class _FakePing: + def ping(self): + return None + + +class _FakeSiftClient: + def __init__(self): + self.test_results = _FakeTestResults(self) + self.ping = _FakePing() + + +@pytest.fixture(scope="session") +def sift_client(): + return _FakeSiftClient() +''' + + +_RUN_ARGS = ( + "--sift-test-results-log-file=false", + "--no-sift-test-results-git-metadata", +) + + +@pytest.fixture +def inner(pytester): + """Reset the capture state and install the inner conftest. Returns ``pytester``.""" + capture.reset() + pytester.makeconftest(_INNER_CONFTEST_SRC) + return pytester + + +def _run(pytester, body: str) -> None: + pytester.makepyfile(textwrap.dedent(body)) + pytester.runpytest_inprocess(*_RUN_ARGS) + + +# --------------------------------------------------------------------------- +# Call-phase exit paths +# --------------------------------------------------------------------------- + + +def test_pass_maps_to_passed(inner): + # Case: CALL-01 + _run( + inner, + """ + def test_x(): + assert True + """, + ) + assert capture.final_status("test_x") == TestStatus.PASSED + + +def test_assert_failure_maps_to_failed(inner): + # Case: CALL-02 + _run( + inner, + """ + def test_x(): + assert 1 == 2 + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_generic_exception_maps_to_error(inner): + # Case: CALL-03 + _run( + inner, + """ + def test_x(): + raise ValueError("boom") + """, + ) + assert capture.final_status("test_x") == TestStatus.ERROR + + +def test_system_exit_maps_to_error(inner): + # Case: CALL-05 + _run( + inner, + """ + import sys + def test_x(): + sys.exit(1) + """, + ) + assert capture.final_status("test_x") == TestStatus.ERROR + + +def test_pytest_fail_maps_to_failed(inner): + # Case: CALL-04 + _run( + inner, + """ + import pytest + def test_x(): + pytest.fail("intentional failure") + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_keyboard_interrupt_leaves_step_in_progress(inner): + # Case: CALL-06 + # KeyboardInterrupt aborts the session before the call-phase makereport + # fires; the plugin can't observe the interrupt. The contract is that + # the step is left in IN_PROGRESS rather than being silently resolved + # to PASSED — a session-aborting interrupt should not look like a clean + # pass in the report. + try: + _run( + inner, + """ + def test_x(): + raise KeyboardInterrupt + """, + ) + except KeyboardInterrupt: + pass + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.IN_PROGRESS + + +# --------------------------------------------------------------------------- +# Skip paths +# --------------------------------------------------------------------------- + + +def test_pytest_skip_in_body_maps_to_skipped(inner): + # Case: SKIP-03 + _run( + inner, + """ + import pytest + def test_x(): + pytest.skip("not today") + """, + ) + # Runtime skip in the body resolves the outer step to SKIPPED. The + # makereport hook must not create a duplicate nested step. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.SKIPPED + duplicates = [s for s in capture.steps_by_name("test_x") if s is not outer] + assert not duplicates, f"expected no duplicate nested step; got {len(duplicates)}" + + +def test_pytest_mark_skip_records_skipped(inner): + # Case: SKIP-01 + _run( + inner, + """ + import pytest + @pytest.mark.skip(reason="collection-time skip") + def test_x(): + assert False + """, + ) + # Collection-time skip: the autouse step fixture never runs. Only the + # makereport hook creates a step, with status SKIPPED. + assert capture.final_status("test_x") == TestStatus.SKIPPED + + +def test_pytest_mark_skipif_records_skipped(inner): + # Case: SKIP-02 + _run( + inner, + """ + import pytest + @pytest.mark.skipif(True, reason="conditional skip") + def test_x(): + assert False + """, + ) + # `skipif` with a truthy condition follows the same path as + # `@pytest.mark.skip`; only the makereport hook records a step. + assert capture.final_status("test_x") == TestStatus.SKIPPED + + +def test_skip_inside_fixture_setup(inner): + # Case: SKIP-04 + _run( + inner, + """ + import pytest + + @pytest.fixture + def skipping_fixture(): + pytest.skip("environment not ready") + + def test_x(skipping_fixture): + assert True + """, + ) + # A setup-phase skip resolves the outer step to SKIPPED. The makereport + # hook must not create a duplicate nested step. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.SKIPPED + duplicates = [s for s in capture.steps_by_name("test_x") if s is not outer] + assert not duplicates, f"expected no duplicate nested step; got {len(duplicates)}" + + +# --------------------------------------------------------------------------- +# xfail / xpass +# --------------------------------------------------------------------------- + + +def test_xfail_marked_test_that_fails(inner): + # Case: XFAIL-01 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(reason="known issue") + def test_x(): + assert 1 == 2 + """, + ) + # xfail + expected failure fulfills the contract; outer step resolves to + # PASSED. No duplicate nested step from the makereport hook. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.PASSED + duplicates = [s for s in capture.steps_by_name("test_x") if s is not outer] + assert not duplicates, f"expected no duplicate nested step; got {len(duplicates)}" + + +def test_xfail_strict_unexpected_pass(inner): + # Case: XFAIL-02 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(strict=True, reason="should fail") + def test_x(): + assert True + """, + ) + # strict xfail that passes must surface as FAILED: either the bug was + # fixed (remove the mark) or the test stopped exercising what it claimed. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_xfail_non_strict_unexpected_pass(inner): + # Case: XFAIL-03 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(reason="might pass sometimes") + def test_x(): + assert True + """, + ) + # Non-strict xfail does not insist on the failure, so a passing run is + # PASSED. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.PASSED + + +def test_xfail_raises_mismatch(inner): + # Case: XFAIL-04 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(raises=ValueError, reason="expected ValueError") + def test_x(): + raise KeyError("wrong exception") + """, + ) + # `raises=` mismatch is a real test failure — the contract required a + # specific exception type and a different one was thrown. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_xfail_run_false(inner): + # Case: XFAIL-05 + _run( + inner, + """ + import pytest + @pytest.mark.xfail(run=False, reason="never run") + def test_x(): + assert False + """, + ) + # The test never ran; outer step is SKIPPED. + assert capture.final_status("test_x") == TestStatus.SKIPPED + + +# --------------------------------------------------------------------------- +# Setup-phase / teardown-phase fixture failures +# --------------------------------------------------------------------------- + + +def test_setup_phase_fixture_failure(inner): + # Case: PHASE-01 + _run( + inner, + """ + import pytest + + @pytest.fixture + def bad_setup(): + raise RuntimeError("setup boom") + + def test_x(bad_setup): + assert True + """, + ) + # A fixture that raises before `yield` fails the setup phase. The outer + # step must surface this as ERROR; the test body never executed and a + # silently green step would hide the failure. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.ERROR + + +def test_teardown_phase_fixture_failure(inner): + # Case: PHASE-02 + _run( + inner, + """ + import pytest + + @pytest.fixture + def bad_teardown(): + yield + raise RuntimeError("teardown boom") + + def test_x(bad_teardown): + assert True + """, + ) + # A fixture that raises after `yield` fails the teardown phase. The + # outer step's status reflects the teardown failure as FAILED rather + # than the call-phase pass. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_call_fail_plus_teardown_fail(inner): + # Case: PHASE-03 + _run( + inner, + """ + import pytest + + @pytest.fixture + def bad_teardown(): + yield + raise RuntimeError("teardown boom") + + def test_x(bad_teardown): + assert 1 == 2 + """, + ) + # Call-phase failure dominates the outer step status; the contract also + # requires the teardown error to be surfaced somewhere on the step + # (mechanism TBD — see pass_fail_behavior.md). This test asserts the + # status today; tighten once a surfacing mechanism is chosen. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +# --------------------------------------------------------------------------- +# Collection-phase failures +# --------------------------------------------------------------------------- + + +def test_missing_fixture_maps_to_error(inner): + # Case: COLL-01 + _run( + inner, + """ + def test_x(nonexistent_fixture): + assert True + """, + ) + # An unresolved fixture is a setup-phase failure. The outer step + # surfaces as ERROR rather than a misleading green pass for a test + # that never executed. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.ERROR + + +# --------------------------------------------------------------------------- +# Plugin-API exit paths (in-test mutations) +# --------------------------------------------------------------------------- + + +def test_manual_status_update_to_failed(inner): + # Case: API-01 + _run( + inner, + """ + from sift_client.sift_types.test_report import TestStatus + def test_x(step): + step.current_step.update({"status": TestStatus.FAILED}) + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_report_outcome_false_maps_to_failed(inner): + # Case: API-02 + _run( + inner, + """ + def test_x(step): + step.report_outcome("the_check", False, "did not match") + """, + ) + # Outer step sees a failed substep and rolls up to FAILED. + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_measure_out_of_bounds_maps_to_failed(inner): + # Case: API-03 + _run( + inner, + """ + def test_x(step): + step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0}) + """, + ) + assert capture.final_status("test_x") == TestStatus.FAILED + + +def test_substep_failure_propagates_to_parent(inner): + # Case: API-04 + _run( + inner, + """ + def test_x(step): + with step.substep(name="inner") as inner_step: + inner_step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0}) + """, + ) + # `test_measure_out_of_bounds_maps_to_failed` exercises a failed + # measurement on the function step itself; this one verifies the same + # failure on a nested substep propagates up to the parent. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.FAILED + + +def test_skipped_substep_does_not_fail_parent(inner): + # Case: API-05 + _run( + inner, + """ + from sift_client.sift_types.test_report import TestStatus + def test_x(step): + with step.substep(name="optional_check") as cal: + cal.current_step.update( + {"status": TestStatus.SKIPPED}, + log_file=step.report_context.log_file, + ) + """, + ) + # A manually-resolved SKIPPED on a substep must not propagate as a failure + # to the parent. The outer step has no measurements of its own and resolves + # to PASSED. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.PASSED diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 7c4c1c2f5..07704383d 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -5,14 +5,16 @@ from dataclasses import dataclass from datetime import datetime, timezone from pathlib import Path +from types import SimpleNamespace from typing import TYPE_CHECKING, Any, Generator, Tuple import pytest from sift_client import SiftClient, SiftConnectionConfig from sift_client.errors import SiftWarning -from sift_client.sift_types.test_report import TestStatus +from sift_client.sift_types.test_report import ErrorInfo, TestStatus from sift_client.util.test_results import ReportContext +from sift_client.util.test_results.context_manager import format_truncated_traceback class SiftPytestPluginWarning(SiftWarning): @@ -508,17 +510,151 @@ def _resolve_log_file(pytestconfig: pytest.Config | None) -> str | Path | bool | return Path(raw) +def _error_info_from_longrepr(longrepr: Any) -> ErrorInfo: + """Fall back to the report's longrepr when no Python exception is available.""" + return ErrorInfo(error_code=1, error_message=str(longrepr) if longrepr is not None else "") + + +def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: + """Resolve the function step's status from pytest's per-phase reports. + + Reads ``_sift_phase_setup`` / ``_sift_phase_call`` and the test's xfail marker, + then mutates ``new_step.current_step`` in place and flips + ``new_step._sift_managed_externally`` so ``NewStep.__exit__`` emits the + resolved status without re-classifying. + + When the call phase reports ``passed`` and no override is needed (i.e. the + test's own status or substep failures should drive the result), this leaves + the step alone so the default ``__exit__`` resolution stays in charge. + """ + setup_phase = getattr(item, "_sift_phase_setup", None) + call_phase = getattr(item, "_sift_phase_call", None) + xfail_marker = item.get_closest_marker("xfail") + xfail_runs = xfail_marker.kwargs.get("run", True) if xfail_marker is not None else True + + status: TestStatus | None = None + error_info: ErrorInfo | None = None + keep_managed = False + + if setup_phase is not None and setup_phase.report.outcome == "failed": + status = TestStatus.ERROR + excinfo = setup_phase.call.excinfo + if excinfo is not None: + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + else: + error_info = _error_info_from_longrepr(setup_phase.report.longrepr) + elif setup_phase is not None and setup_phase.report.outcome == "skipped": + status = TestStatus.SKIPPED + elif call_phase is None: + # Setup completed but the call-phase report never fired — the inner + # pytester session was aborted (e.g. by KeyboardInterrupt) before the + # plugin could observe the outcome. Leave the step at IN_PROGRESS so + # the report does not lie about a clean pass. + keep_managed = True + else: + wasxfail = getattr(call_phase.report, "wasxfail", None) + if wasxfail is not None: + if call_phase.report.outcome == "failed": + # Strict xpass: pytest synthesizes a failure when an xfail(strict=True) + # test unexpectedly passes. The xfail mark no longer matches reality. + status = TestStatus.FAILED + elif call_phase.report.outcome == "skipped": + if xfail_marker is not None and xfail_runs is False: + # xfail(run=False): the test body never executed. + status = TestStatus.SKIPPED + else: + # xfail + expected failure: the test fulfilled its xfail expectation. + status = TestStatus.PASSED + else: + # Non-strict xpass: passes that weren't required to fail. + status = TestStatus.PASSED + elif call_phase.report.outcome == "passed": + # Default __exit__ resolves PASSED/FAILED from open_step_results and any + # status the test code may have set. Don't override it here. + return + elif call_phase.report.outcome == "skipped": + status = TestStatus.SKIPPED + elif call_phase.report.outcome == "failed": + excinfo = call_phase.call.excinfo + if excinfo is None: + status = TestStatus.FAILED + elif isinstance(excinfo.value, AssertionError): + status = TestStatus.FAILED + elif isinstance(excinfo.value, pytest.fail.Exception): + status = TestStatus.FAILED + elif isinstance(excinfo.value, KeyboardInterrupt): + # Hold status at IN_PROGRESS; a session-aborting interrupt should + # not look like a clean pass. Keep the managed flag so the default + # exit path does not coerce IN_PROGRESS to PASSED. + keep_managed = True + elif isinstance(excinfo.value, SystemExit): + status = TestStatus.ERROR + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + elif xfail_marker is not None: + # xfail(raises=X) with a non-matching exception: the contract failed. + status = TestStatus.FAILED + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + else: + status = TestStatus.ERROR + error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + + if status is None and not keep_managed: + return + + assert new_step.current_step is not None + if status is not None: + # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. + new_step.current_step.__dict__["status"] = status + if error_info is not None: + new_step.current_step.__dict__["error_info"] = error_info + new_step._sift_managed_externally = True + + +def _finalize_after_teardown(item: pytest.Item, teardown_report: pytest.TestReport) -> None: + """Upgrade a closed step to FAILED when the teardown phase failed. + + The autouse step fixture has already exited by the time the teardown + makereport hook fires, so call ``step.update`` again to override the status + server-side and propagate the failure to the still-open parent step. + """ + step: NewStep | None = getattr(item, "_sift_step", None) + if step is None: + return + assert step.current_step is not None + if teardown_report.outcome == "failed" and step.current_step.status == TestStatus.PASSED: + step.current_step.update({"status": TestStatus.FAILED}) + step.report_context.mark_step_failed_after_close(step.current_step) + + @pytest.hookimpl(tryfirst=True, hookwrapper=True) def pytest_runtest_makereport(item: pytest.Item, call: pytest.CallInfo[Any]): - """Capture pytest outcomes so assertion failures and skips land on the Sift step.""" + """Capture per-phase reports and finalize step status after teardown. + + Stashes both ``rep_`` (the ``CallInfo``, kept for pytest plugins that + expect that conventional attribute) and ``_sift_phase_`` (a + ``SimpleNamespace(call, report)`` used by ``_resolve_initial_status``). The + collection-time skip path is strictly gated on ``_sift_step`` being unset + so it does not duplicate steps the fixture already created. + """ outcome = yield report = outcome.get_result() - if report.outcome == "skipped": - # Skipped tests bypass the autouse `step` fixture, so we record the step manually here. - if REPORT_CONTEXT: - with REPORT_CONTEXT.new_step(name=item.name) as new_step: - new_step.current_step.update({"status": TestStatus.SKIPPED}) setattr(item, "rep_" + report.when, call) + setattr(item, "_sift_phase_" + report.when, SimpleNamespace(call=call, report=report)) + + # Collection-time skip (``@pytest.mark.skip`` / ``skipif``): the autouse + # ``step`` fixture never runs, so the hook is the only place that can + # record a step. Presence of ``_sift_step`` is the "fixture ran" signal. + if ( + REPORT_CONTEXT + and report.when == "setup" + and report.outcome == "skipped" + and getattr(item, "_sift_step", None) is None + ): + with REPORT_CONTEXT.new_step(name=item.name) as inline_step: + inline_step.current_step.update({"status": TestStatus.SKIPPED}) + + if report.when == "teardown": + _finalize_after_teardown(item, report) def _report_context_impl( @@ -748,13 +884,9 @@ def _step_impl( with report_context.new_step( name=name, description=existing_docstring, assertion_as_fail_not_error=False ) as new_step: + node._sift_step = new_step yield new_step - if hasattr(node, "rep_call") and node.rep_call.excinfo: - new_step.update_step_from_result( - node.rep_call.excinfo, - node.rep_call.excinfo.value, - node.rep_call.excinfo.tb, - ) + _resolve_initial_status(new_step, node) @pytest.fixture(autouse=True) diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index bd2ec917f..0f6097abc 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -43,6 +43,17 @@ logger = logging.getLogger(__name__) +def format_truncated_traceback( + exc: type[BaseException] | None, + exc_value: BaseException | None, + tb: object | None, +) -> ErrorInfo: + """Format an ErrorInfo from a traceback, keeping the first frame and the last 10.""" + stack = traceback.format_exception(exc, exc_value, tb) # type: ignore[arg-type] + stack = [stack[0], *stack[-10:]] if len(stack) > 10 else stack + return ErrorInfo(error_code=1, error_message="".join(stack)) + + def log_replay_instructions(log_file: str | Path | None) -> None: """Surface replay instructions when an import/replay attempt fails. @@ -363,6 +374,17 @@ def record_step_outcome(self, outcome: bool, step: TestStep): self.open_step_results[step.step_path] = False self.any_failures = True + def mark_step_failed_after_close(self, step: TestStep): + """Mark a step's parent as failed after the step has already been popped from the stack. + + Used by the pytest plugin when a teardown-phase report fires after the + fixture's ``__exit__`` has already resolved and exited the step. + """ + self.any_failures = True + path_parts = step.step_path.split(".") + if len(path_parts) > 1: + self.open_step_results[".".join(path_parts[:-1])] = False + def resolve_and_propagate_step_result( self, step: TestStep, @@ -478,13 +500,7 @@ def update_step_from_result( # If we're not showing assertion errors (i.e. pytest), mark step as failed but don't set error info. self.report_context.record_step_outcome(False, self.current_step) else: - stack = traceback.format_exception(exc, exc_value, tb) # type: ignore - stack = [stack[0], *stack[-10:]] if len(stack) > 10 else stack - trace = "".join(stack) - error_info = ErrorInfo( - error_code=1, - error_message=trace, - ) + error_info = format_truncated_traceback(exc, exc_value, tb) # Resolve the status of this step (i.e. fail if children failed) and propagate the result to the parent step. result = self.report_context.resolve_and_propagate_step_result( @@ -509,6 +525,27 @@ def update_step_from_result( return result def __exit__(self, exc, exc_value, tb): + if getattr(self, "_sift_managed_externally", False): + # The pytest fixture already resolved status from phase reports. + # Run the standard propagation so the parent step sees this step's + # pass/fail, emit one update_step with the resolved values, and pop + # from the stack without re-classifying. + assert self.current_step is not None + result = self.report_context.resolve_and_propagate_step_result( + self.current_step, self.current_step.error_info + ) + self.current_step.update( + { + "status": self.current_step.status, + "end_time": datetime.now(timezone.utc), + "error_info": self.current_step.error_info, + }, + ) + self.report_context.exit_step(self.current_step) + if hasattr(self, "force_result"): + result = self.force_result + return result + result = self.update_step_from_result(exc, exc_value, tb) # Now that the step is updated. Let the report context handle removing it from the stack and updating the report context. From 212bf77acb4a471f153dcf6474c959b1952fd44d Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 22 May 2026 18:55:12 -0700 Subject: [PATCH 2/6] clean up --- .../lib/sift_client/_tests/pytest_plugin/test_pass_fail.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py index c5027fb41..a05d18982 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -146,6 +146,8 @@ def ping(self): class _FakeSiftClient: + _simulate = False + def __init__(self): self.test_results = _FakeTestResults(self) self.ping = _FakePing() @@ -158,8 +160,8 @@ def sift_client(): _RUN_ARGS = ( - "--sift-test-results-log-file=false", - "--no-sift-test-results-git-metadata", + "--sift-log-file=false", + "--no-sift-git-metadata", ) From 6309d98893e7cd8e1aea4549b98a4d0bb41de12b Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 22 May 2026 20:22:06 -0700 Subject: [PATCH 3/6] clean up to properly use jsonl that we already have for tests --- .../_tests/pytest_plugin/_fakes.py | 132 ------------ .../pytest_plugin/_step_status_capture.py | 129 +++++++++--- .../pytest_plugin/step_status_states.md | 148 ++++++------- .../_tests/pytest_plugin/test_hierarchy.py | 172 +++++++--------- .../_tests/pytest_plugin/test_pass_fail.py | 194 +++++------------- python/lib/sift_client/pytest_plugin.py | 12 +- .../util/test_results/context_manager.py | 78 +++---- 7 files changed, 343 insertions(+), 522 deletions(-) delete mode 100644 python/lib/sift_client/_tests/pytest_plugin/_fakes.py diff --git a/python/lib/sift_client/_tests/pytest_plugin/_fakes.py b/python/lib/sift_client/_tests/pytest_plugin/_fakes.py deleted file mode 100644 index 460100daa..000000000 --- a/python/lib/sift_client/_tests/pytest_plugin/_fakes.py +++ /dev/null @@ -1,132 +0,0 @@ -"""Test doubles for the pytester-driven pytest-plugin tests. - -The fake ``ReportContext`` is a drop-in for the real one that records every -step creation to a JSON file at session exit. Used by ``test_parametrize.py`` -to assert the step tree produced by an inner pytester pytest run. -""" - -from __future__ import annotations - -import itertools -import json -from typing import TYPE_CHECKING, Any -from unittest.mock import MagicMock - -if TYPE_CHECKING: - from pathlib import Path - - -class FakeStep: - def __init__(self, id_: str, name: str, parent_step_id: str | None, step_path: str) -> None: - self.id_ = id_ - self.name = name - self.parent_step_id = parent_step_id - self.step_path = step_path - self.status: Any = None - self.description: Any = None - self.error_info: Any = None - - def update(self, fields: dict[str, Any]) -> None: - for k, v in fields.items(): - setattr(self, k, v) - - -class FakeReport: - def __init__(self) -> None: - self.id_ = "report-id" - - def update(self, fields: dict[str, Any]) -> None: - pass - - -class FakeReportContext: - def __init__(self, steps_file: Path) -> None: - self.steps_file = steps_file - self.report = FakeReport() - self.client = MagicMock() - self.step_stack: list[FakeStep] = [] - self.step_number_at_depth: dict[int, int] = {} - self.open_step_results: dict[str, bool] = {} - self.any_failures = False - self.log_file: Path | None = None - self.steps: list[dict[str, Any]] = [] - self._ids = itertools.count(1) - - def __enter__(self) -> FakeReportContext: - return self - - def __exit__(self, *_: Any) -> None: - self.steps_file.write_text(json.dumps(self.steps)) - - def new_step( - self, - name: str, - description: str | None = None, - assertion_as_fail_not_error: bool = True, - metadata: dict[str, Any] | None = None, - ) -> Any: - # Reuse the real NewStep machinery — it talks to this fake via the - # methods below. - from sift_client.util.test_results.context_manager import NewStep - - return NewStep( - self, # type: ignore[arg-type] - name=name, - description=description, - assertion_as_fail_not_error=assertion_as_fail_not_error, - metadata=metadata, - ) - - def get_next_step_path(self) -> str: - top = self.step_stack[-1] if self.step_stack else None - path = top.step_path if top else "" - next_n = self.step_number_at_depth.get(len(self.step_stack), 0) + 1 - prefix = f"{path}." if path else "" - return f"{prefix}{next_n}" - - def create_step( - self, - name: str, - description: str | None = None, - metadata: dict[str, Any] | None = None, - ) -> FakeStep: - step_path = self.get_next_step_path() - parent = self.step_stack[-1] if self.step_stack else None - step = FakeStep( - id_=f"step-{next(self._ids)}", - name=name, - parent_step_id=parent.id_ if parent else None, - step_path=step_path, - ) - self.step_number_at_depth[len(self.step_stack)] = ( - self.step_number_at_depth.get(len(self.step_stack), 0) + 1 - ) - self.step_stack.append(step) - self.open_step_results[step.step_path] = True - self.steps.append( - { - "id": step.id_, - "name": name, - "parent_step_id": step.parent_step_id, - "step_path": step_path, - } - ) - return step - - def record_step_outcome(self, outcome: bool, step: FakeStep) -> None: - if not outcome: - self.open_step_results[step.step_path] = False - self.any_failures = True - - def resolve_and_propagate_step_result(self, step: FakeStep, error_info: Any = None) -> bool: - result = self.open_step_results.get(step.step_path, True) - if error_info: - result = False - return result - - def exit_step(self, step: FakeStep) -> None: - self.step_number_at_depth[len(self.step_stack)] = 0 - stack_top = self.step_stack.pop() - self.open_step_results.pop(step.step_path) - if stack_top.id_ != step.id_: - raise ValueError("popped step was not the top of the stack") diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py index 6838fdcc5..dd8e8c05a 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py +++ b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py @@ -1,23 +1,20 @@ -"""Shared state for the step-status characterization suite. +"""Read step status sequences from a Sift offline-mode log file. -The outer test in ``test_step_status_states.py`` runs inner pytest sessions -via ``pytester``. The inner session installs a fake ``sift_client`` (see -``_INNER_CONFTEST_SRC`` in that file) which records every step status -write into this module's ``CAPTURED_STEPS`` dict so the outer test can -assert on what the plugin produced. - -This lives in its own module (rather than inside the test file) because -the inner ``conftest.py`` runs in a fresh pytester tmp dir and needs an -importable, package-reachable handle to the same dict object. +The contract suite drives each scenario through an inner pytester session +run with ``--sift-offline``, which causes the real plugin + ``ReportContext`` +to write every test-result API call to a JSONL log. This module parses +that log into a per-step status timeline that ``test_pass_fail.py`` asserts +against, with no test-only ``ReportContext`` fake required. """ from __future__ import annotations +import json from dataclasses import dataclass, field -from typing import TYPE_CHECKING +from pathlib import Path -if TYPE_CHECKING: - from sift_client.sift_types.test_report import TestStatus +from sift_client._internal.low_level_wrappers._test_results_log import iter_log_data_lines +from sift_client.sift_types.test_report import TestStatus @dataclass @@ -29,37 +26,111 @@ class CapturedStep: statuses: list[TestStatus] = field(default_factory=list) -CAPTURED_STEPS: dict[str, CapturedStep] = {} +_PROTO_STATUS_NAMES = { + "TEST_STATUS_UNSPECIFIED": TestStatus.UNSPECIFIED, + "TEST_STATUS_DRAFT": TestStatus.DRAFT, + "TEST_STATUS_PASSED": TestStatus.PASSED, + "TEST_STATUS_FAILED": TestStatus.FAILED, + "TEST_STATUS_ABORTED": TestStatus.ABORTED, + "TEST_STATUS_ERROR": TestStatus.ERROR, + "TEST_STATUS_IN_PROGRESS": TestStatus.IN_PROGRESS, + "TEST_STATUS_SKIPPED": TestStatus.SKIPPED, +} + + +def _status(name: str | None) -> TestStatus: + if name is None: + return TestStatus.UNSPECIFIED + return _PROTO_STATUS_NAMES.get(name, TestStatus.UNSPECIFIED) -def reset() -> None: - CAPTURED_STEPS.clear() +def parse_log(log_path: Path) -> dict[str, CapturedStep]: + """Parse the offline log into ``{step_id: CapturedStep}``. + + Walks the JSONL file in order, building a ``CapturedStep`` for each + ``CreateTestStep`` entry and appending the new status from each + ``UpdateTestStep`` entry. + """ + steps: dict[str, CapturedStep] = {} + for request_type, response_id, json_str in iter_log_data_lines(log_path): + payload = json.loads(json_str) + test_step = payload.get("testStep", {}) + if request_type == "CreateTestStep" and response_id: + steps[response_id] = CapturedStep( + step_id=response_id, + name=test_step.get("name", ""), + step_path=test_step.get("stepPath", ""), + parent_step_id=test_step.get("parentStepId") or None, + statuses=[_status(test_step.get("status"))], + ) + elif request_type == "UpdateTestStep": + step_id = test_step.get("testStepId") + new_status = test_step.get("status") + if step_id and step_id in steps and new_status is not None: + steps[step_id].statuses.append(_status(new_status)) + return steps + + +_active_log: Path | None = None +_cached: dict[str, CapturedStep] | None = None + + +def set_log(path: Path) -> None: + """Point subsequent queries at a new log file. Clears the parse cache.""" + global _active_log, _cached + _active_log = path + _cached = None + + +def _steps() -> dict[str, CapturedStep]: + global _cached + if _cached is None: + if _active_log is None or not _active_log.exists(): + _cached = {} + else: + _cached = parse_log(_active_log) + return _cached def steps_by_name(name: str) -> list[CapturedStep]: - return [s for s in CAPTURED_STEPS.values() if s.name == name] + return [s for s in _steps().values() if s.name == name] def test_step(name: str) -> CapturedStep | None: """The step the autouse ``step`` fixture creates for the test function. - There can be a deeper step with the same name when the ``makereport`` - hook also records one (e.g. ``pytest.skip()`` inside the test body, or - an ``xfail`` mark). The autouse step is the shallowest of those, so - pick by step_path depth. + Multiple steps can share a name (e.g. when the makereport hook records an + inline step for a collection-time skip on top of the autouse step). The + autouse step is the shallowest by path depth. """ - matches = [s for s in CAPTURED_STEPS.values() if s.name == name] + matches = steps_by_name(name) if not matches: return None return min(matches, key=lambda s: s.step_path.count(".")) -def child_steps(parent: CapturedStep) -> list[CapturedStep]: - return [s for s in CAPTURED_STEPS.values() if s.parent_step_id == parent.step_id] - - def final_status(name: str) -> TestStatus | None: step = test_step(name) - if step is None or not step.statuses: - return None - return step.statuses[-1] + return step.statuses[-1] if step and step.statuses else None + + +def load_steps(log_path: Path) -> list[dict]: + """Load the offline log as a list of step records keyed by hierarchy fields. + + Each record has ``id``, ``name``, ``parent_step_id``, ``step_path``, the + shape ``test_hierarchy.py`` expects for its ``_by_name`` and + ``_ancestor_names`` walkers. Returns an empty list if the log was never + created (e.g. every item in the inner session was ``sift_exclude``-d, so + the plugin's ``report_context`` fixture never fired). + """ + if not log_path.exists(): + return [] + return [ + { + "id": s.step_id, + "name": s.name, + "parent_step_id": s.parent_step_id, + "step_path": s.step_path, + } + for s in parse_log(log_path).values() + ] diff --git a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md index 83b64783d..f98d13fee 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md +++ b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md @@ -1,28 +1,21 @@ -# Pytest-plugin step-status: observed vs. target - -Companion document to `test_step_status_states.py`. Each row corresponds to -one scenario in that suite. The **target** column is the contract the suite -asserts (sourced from -[`docs/guides/pytest_plugin/pass_fail_behavior.md`](../../../../docs/guides/pytest_plugin/pass_fail_behavior.md)); -the **observed today** column records what the plugin actually produces -right now. Every row should be marked `OK`; a `Gap` indicates the plugin has -regressed against the contract. - -`TestStatus` values referenced below come from -`sift_client.sift_types.test_report.TestStatus`: `PASSED`, `FAILED`, `ERROR`, -`SKIPPED`, `IN_PROGRESS`. The targets below map every scenario onto these -existing statuses. An `ABORTED` status for hard process exits (`SystemExit`, -`KeyboardInterrupt`, signals) is a planned future addition; until it lands -those cases baseline against `ERROR` or `IN_PROGRESS`. The user-facing -contract these targets describe is documented in +# Pytest-plugin step-status: test scenarios + +Reference for the pass/fail scenarios covered by +[`test_pass_fail.py`](test_pass_fail.py). Each row pairs a scenario with the +`TestStatus` the plugin records, and maps to the user-facing contract in [`docs/guides/pytest_plugin/pass_fail_behavior.md`](../../../../docs/guides/pytest_plugin/pass_fail_behavior.md). +`TestStatus` values come from `sift_client.sift_types.test_report.TestStatus`: +`PASSED`, `FAILED`, `ERROR`, `SKIPPED`, `ABORTED`, `IN_PROGRESS`. Hard process +exits the plugin can observe (`SystemExit`, `KeyboardInterrupt` when pytest +delivers a call-phase report) map to `ABORTED`. A session-aborting interrupt +that fires before the plugin sees it leaves the step in `IN_PROGRESS`. + ## Case ID scheme -Each scenario has a stable case ID of the form `PREFIX-NN`, where the -prefix names its section. Tests in `test_step_status_states.py` reference -their case ID in a leading comment so a failing test can be traced back to -this table without rereading the scenario: +Each scenario has a stable case ID of the form `PREFIX-NN`. Tests in +`test_pass_fail.py` reference their case ID in a leading comment so a test can +be traced back to its row here without rereading the scenario: | Prefix | Section | | ------- | ---------------------------------------- | @@ -33,94 +26,79 @@ this table without rereading the scenario: | `COLL` | Collection / fixture-resolution failures | | `API` | Plugin-API exit paths | -IDs are stable: a new scenario in a section takes the next free number for -that prefix; numbers are never reused or shifted when other sections grow. ## Call-phase exit paths -| Case | Scenario | Trigger | Observed today | Target | Status | -| --------- | --------------------------------------- | --------------------------------------------- | --------------------------- | ------------------------------------------ | ------ | -| `CALL-01` | Test passes | function body returns cleanly | `PASSED` | `PASSED` | OK | -| `CALL-02` | Assert failure in call phase | `assert 1 == 2` | `FAILED` | `FAILED` | OK | -| `CALL-03` | Generic exception in call phase | `raise ValueError("boom")` | `ERROR` | `ERROR` | OK | -| `CALL-04` | `pytest.fail("...")` from body | `pytest.fail("intentional failure")` | `FAILED` | `FAILED` | OK | -| `CALL-05` | `SystemExit` from the test body | `sys.exit(1)` | `ERROR` | `ERROR` (baseline; `ABORTED` planned later) | OK | -| `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `IN_PROGRESS` (session aborts before the plugin sees the interrupt) | `ERROR` when the plugin sees the interrupt; a session-aborting interrupt leaves the step in `IN_PROGRESS` | OK | +| Case | Scenario | Trigger | Outcome | +| --------- | ------------------------------- | ------------------------------------ | -------------------------------------------------------------------------------------------------------- | +| `CALL-01` | Test passes | function body returns cleanly | `PASSED` | +| `CALL-02` | Assert failure in call phase | `assert 1 == 2` | `FAILED` | +| `CALL-03` | Generic exception in call phase | `raise ValueError("boom")` | `ERROR` | +| `CALL-04` | `pytest.fail("...")` from body | `pytest.fail("intentional failure")` | `FAILED` | +| `CALL-05` | `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | +| `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `IN_PROGRESS` — session aborts before the plugin sees the interrupt; `ABORTED` if the plugin does see it | ## Skip paths -| Case | Scenario | Trigger | Observed today | Target | Status | -| --------- | --------------------------------------- | --------------------------------------------- | --------------------------------------------------------------------------- | --------------------------------------------------------------- | ------ | -| `SKIP-01` | Collection-time skip | `@pytest.mark.skip(reason=...)` | `SKIPPED` (only the makereport hook records a step; no autouse step ran) | `SKIPPED` | OK | -| `SKIP-02` | Conditional collection-time skip | `@pytest.mark.skipif(True, reason=...)` | `SKIPPED` (same route as `@pytest.mark.skip`) | `SKIPPED` | OK | -| `SKIP-03` | Runtime skip in body | `pytest.skip("...")` | Outer step `SKIPPED`; no duplicate nested step | Outer step `SKIPPED`; no duplicate nested step | OK | -| `SKIP-04` | Skip raised inside a fixture | `@pytest.fixture` calls `pytest.skip("...")` | Outer step `SKIPPED` (setup-phase skip); no duplicate nested step | Outer step `SKIPPED` (setup-phase skip); no duplicate nested step | OK | +| Case | Scenario | Trigger | Outcome | +| --------- | -------------------------------- | -------------------------------------------- | ------------------------------------------------------------------------ | +| `SKIP-01` | Collection-time skip | `@pytest.mark.skip(reason=...)` | `SKIPPED` — only the makereport hook records a step; no autouse step ran | +| `SKIP-02` | Conditional collection-time skip | `@pytest.mark.skipif(True, reason=...)` | `SKIPPED` — same route as `@pytest.mark.skip` | +| `SKIP-03` | Runtime skip in body | `pytest.skip("...")` | Outer step `SKIPPED`; no duplicate nested step | +| `SKIP-04` | Skip raised inside a fixture | `@pytest.fixture` calls `pytest.skip("...")` | Outer step `SKIPPED` (setup-phase skip); no duplicate nested step | ## xfail / xpass -| Case | Scenario | Trigger | Observed today | Target | Status | -| ---------- | ----------------------------------------- | ------------------------------------------------------ | ----------------------------------------------------------------------------------------------- | ----------------------------------------------------- | ------ | -| `XFAIL-01` | xfail-marked test that fails | `@pytest.mark.xfail` + `assert 1 == 2` | Outer step `PASSED` (test fulfilled the xfail expectation); no duplicate nested step | Outer step `PASSED` (test fulfilled the xfail expectation); no duplicate nested step | OK | -| `XFAIL-02` | Strict xfail that unexpectedly passes | `@pytest.mark.xfail(strict=True)` + `assert True` | Outer step `FAILED` (mark no longer matches reality — either the bug was fixed or the test stopped testing what it claimed) | Outer step `FAILED` (mark no longer matches reality — either the bug was fixed or the test stopped testing what it claimed) | OK | -| `XFAIL-03` | Non-strict xfail that unexpectedly passes | `@pytest.mark.xfail()` + `assert True` | Outer step `PASSED` | Outer step `PASSED` (`strict=False` doesn't insist on the failure) | OK | -| `XFAIL-04` | `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` (the `raises=` mismatch is a real test failure) | `FAILED` (the `raises=` mismatch is a real test failure) | OK | -| `XFAIL-05` | `xfail(run=False)` | `@pytest.mark.xfail(run=False)` (body never executed) | `SKIPPED` (the test never ran) | `SKIPPED` (the test never ran) | OK | +| Case | Scenario | Trigger | Outcome | +| ---------- | ----------------------------------------- | ---------------------------------------------------------- | -------------------------------------------------------- | +| `XFAIL-01` | xfail-marked test that fails | `@pytest.mark.xfail` + `assert 1 == 2` | `PASSED` — test fulfilled the xfail expectation | +| `XFAIL-02` | Strict xfail that unexpectedly passes | `@pytest.mark.xfail(strict=True)` + `assert True` | `FAILED` — mark no longer matches reality | +| `XFAIL-03` | Non-strict xfail that unexpectedly passes | `@pytest.mark.xfail()` + `assert True` | `PASSED` — `strict=False` doesn't insist on the failure | +| `XFAIL-04` | `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` — `raises=` mismatch is a real test failure | +| `XFAIL-05` | `xfail(run=False)` | `@pytest.mark.xfail(run=False)` (body never executed) | `SKIPPED` — the test never ran | ## Setup / teardown phases -| Case | Scenario | Trigger | Observed today | Target | Status | -| ---------- | ------------------------------------------ | -------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- | ------ | -| `PHASE-01` | Setup-phase fixture failure (RuntimeError) | `@pytest.fixture` raises before `yield`; test body never runs | Outer step `ERROR`; the plugin reads the setup-phase report and maps `failed` → `ERROR` | `ERROR` (a `phase=setup` annotation is a planned follow-up) | OK | -| `PHASE-02` | Teardown-phase fixture failure | `@pytest.fixture` raises after `yield`; test body passed | Outer step `FAILED`; after teardown the plugin upgrades a passed step when the teardown report shows `failed` | `FAILED` (a `phase=teardown` annotation is a planned follow-up) | OK | -| `PHASE-03` | Call-phase fail **plus** teardown-phase fail | `assert 1 == 2` in body AND `@pytest.fixture` raises after `yield` | Outer step `FAILED` (the call-phase failure dominates); the teardown error is not yet surfaced separately | `FAILED`; surfacing the teardown error alongside is a planned follow-up | OK | +| Case | Scenario | Trigger | Outcome | +| ---------- | -------------------------------------------- | ------------------------------------------------------------------ | -------------------------------------------------------------------------------------------------------------------------------- | +| `PHASE-01` | Setup-phase fixture failure (RuntimeError) | `@pytest.fixture` raises before `yield`; test body never runs | `ERROR` — plugin reads the setup-phase report and maps `failed` → `ERROR` (a `phase=setup` annotation is a planned follow-up) | +| `PHASE-02` | Teardown-phase fixture failure | `@pytest.fixture` raises after `yield`; test body passed | `FAILED` — plugin upgrades a passed step when the teardown report shows `failed` (a `phase=teardown` annotation is a planned follow-up) | +| `PHASE-03` | Call-phase fail **plus** teardown-phase fail | `assert 1 == 2` in body AND `@pytest.fixture` raises after `yield` | `FAILED` — call-phase failure dominates; surfacing the teardown error alongside is a planned follow-up | ## Collection / fixture-resolution failures -| Case | Scenario | Trigger | Observed today | Target | Status | -| --------- | --------------------------------------- | --------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- | ------ | -| `COLL-01` | Missing fixture | `def test_x(nonexistent_fixture):` | Outer step `ERROR` — the missing fixture surfaces as a setup-phase failure, which the plugin now maps to `ERROR` | `ERROR` (a `phase=setup` annotation is a planned follow-up) | OK | +| Case | Scenario | Trigger | Outcome | +| --------- | --------------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------ | +| `COLL-01` | Missing fixture | `def test_x(nonexistent_fixture):` | `ERROR` — missing fixture surfaces as a setup-phase failure (a `phase=setup` annotation is a planned follow-up) | ## Plugin-API exit paths (in-test mutations) -| Case | Scenario | Trigger | Observed today | Target | Status | -| -------- | --------------------------------------- | ---------------------------------------------------------------------- | ----------------------------------------------------------- | -------- | ------ | -| `API-01` | Manual status override | `step.current_step.update({"status": TestStatus.FAILED})` | `FAILED` | `FAILED` | OK | -| `API-02` | `report_outcome(result=False)` | `step.report_outcome("the_check", False, "did not match")` | `FAILED` | `FAILED` | OK | -| `API-03` | `measure(...)` out-of-bounds | `step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0})` | `FAILED` | `FAILED` | OK | -| `API-04` | Failed measurement on a substep | `with step.substep(...) as s: s.measure(... out-of-bounds)` | `FAILED` (propagates from substep to parent) | `FAILED` | OK | -| `API-05` | Manually-skipped substep | `with step.substep(...) as s: s.current_step.update({"status": SKIPPED})` | Parent step `PASSED` (skip does not propagate as a failure) | `PASSED` | OK | +| Case | Scenario | Trigger | Outcome | +| -------- | --------------------------------- | ------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------- | +| `API-01` | Manual status override | `step.current_step.update({"status": TestStatus.FAILED})` | `FAILED` | +| `API-02` | `report_outcome(result=False)` | `step.report_outcome("the_check", False, "did not match")` | `FAILED` | +| `API-03` | `measure(...)` out-of-bounds | `step.measure(name="m", value=10.0, bounds={"min": 0.0, "max": 5.0})` | `FAILED` | +| `API-04` | Failed measurement on a substep | `with step.substep(...) as s: s.measure(... out-of-bounds)` | `FAILED` — propagates from substep to parent | +| `API-05` | Manually-skipped substep | `with step.substep(...) as s: s.current_step.update({"status": SKIPPED})` | Parent step `PASSED` — skip does not propagate as a failure | +| `API-06` | Hard exit inside a nested substep | `with step.substep(...) as s: with s.substep(...): sys.exit(1)` | Every open step on the unwind path records `ABORTED`; a sibling substep that closed before the abort keeps its prior status | -## Out of scope for this characterization run +## Out of scope -- **Timeout** — needs `pytest-timeout` or a manual signal harness. Add as a - follow-up once the audit picks a timeout strategy. +Scenarios deliberately not covered by this suite: + +- **Timeout** — needs `pytest-timeout` or a manual signal harness. - **Signal (SIGKILL / SIGTERM)** — cannot be caught from inside the process; needs a subprocess-level harness. - **`pytest.exit("...")`** — niche; the "aborts subsequent tests" behavior - is hard to characterize cleanly because each `pytester` invocation is its - own session. Document the expectation alongside `SystemExit`. + is hard to characterize cleanly because each `pytester` invocation is + its own session. - **`os._exit()`** — bypasses Python cleanup entirely; can't be tested - in-process because it would kill the outer pytest run. Document as a - guaranteed data-loss case alongside `SystemExit` / `SIGKILL`. + in-process because it would kill the outer pytest run. Guaranteed + data-loss case alongside `SystemExit` / `SIGKILL`. - **Parametrize-level marks** (`pytest.param(..., marks=pytest.mark.xfail / skip)`) — routes through a different selection path but produces the same - `report.outcome`, so behavior should match the function-level marks - already covered above. Add only if the plugin's eventual phase-aware - handler diverges between the two. + `report.outcome`, so behavior matches the function-level marks already + covered above. - **Import error / syntax error / `conftest.py` error** — these fail - collection entirely; no `item` is produced and no plugin hook fires. - Document explicitly that no Sift step is recorded. -- **No-data / indeterminate** — tracked separately as part of the sibling - status-semantics work. - -## How to refresh this table - -Run the suite locally: - -``` -pytest lib/sift_client/_tests/util/test_step_status_states.py -v -``` - -Every row should be `OK`. If a row regresses to `Gap`, the matching test -fails; update the **Observed today** column here to describe the -regression and flip the row's status to **Gap** until the plugin is fixed. + collection entirely; no `item` is produced and no plugin hook fires, so + no Sift step is recorded. diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py index cecad2df8..866471ce7 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -4,65 +4,43 @@ classes (including nested), parametrize axes — plus the ini opt-out flags, failure-cleanup semantics, and the drain helper. -Each test spins up an inner pytest run via ``pytester`` whose conftest swaps -in a ``FakeReportContext`` (from ``_fakes.py``) that records every step -creation to a JSON file. The outer test reads that file and asserts the -resulting step tree. +Each test spins up an inner pytest run via ``pytester`` configured with +``--sift-offline`` and a known log path. The plugin writes every test-result +API call to that JSONL log, and the outer test parses it via +``_step_status_capture.load_steps`` to reconstruct the step tree. """ from __future__ import annotations -import json -from pathlib import Path as _Path from textwrap import dedent from typing import TYPE_CHECKING import pytest +from sift_client._tests.pytest_plugin import _step_status_capture as capture + if TYPE_CHECKING: from pathlib import Path -_STEPS_FILE_ENV = "SIFT_FAKE_STEPS_FILE" - -# ``_fakes.py`` is excluded from the wheel by ``pyproject.toml``'s -# ``packages.find`` rule that strips ``sift_client._tests``. The inner -# pytester subprocess uses the installed package and cannot import from -# ``sift_client._tests``. Embed the fake source directly into the inner -# conftest so the subprocess gets a fully self-contained module to load. -_FAKES_SOURCE = (_Path(__file__).parent / "_fakes.py").read_text() - -_INNER_CONFTEST = f""" -{_FAKES_SOURCE} - -import os -from pathlib import Path -from unittest.mock import MagicMock - -import pytest - -pytest_plugins = ["sift_client.pytest_plugin"] - -@pytest.fixture(scope="session") -def sift_client(): - return MagicMock() +_INNER_CONFTEST = 'pytest_plugins = ["sift_client.pytest_plugin"]\n' -@pytest.fixture(scope="session", autouse=True) -def report_context(sift_client): - import sift_client.pytest_plugin as plugin_module - steps_file = Path(os.environ[{_STEPS_FILE_ENV!r}]) - with FakeReportContext(steps_file) as ctx: - plugin_module.REPORT_CONTEXT = ctx - yield ctx -""" +def _base_ini_lines(log_path: Path) -> list[str]: + """Default ini settings every inner pytester run needs.""" + return [ + "[pytest]", + "sift_offline = true", + f"sift_log_file = {log_path}", + "sift_git_metadata = false", + ] @pytest.fixture -def steps_file(pytester: pytest.Pytester, monkeypatch: pytest.MonkeyPatch) -> Path: - path = pytester.path / "captured_steps.json" +def log_file(pytester: pytest.Pytester) -> Path: + path = pytester.path / "sift.log" pytester.makeconftest(_INNER_CONFTEST) - monkeypatch.setenv(_STEPS_FILE_ENV, str(path)) + pytester.makefile(".ini", pytest="\n".join(_base_ini_lines(path)) + "\n") return path @@ -86,7 +64,7 @@ def _ancestor_names(steps: list[dict], leaf: dict) -> list[str]: def test_class_methods_cluster_under_class_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_klass=dedent( @@ -102,7 +80,7 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["TestFoo"]) == 1 class_id = by_name["TestFoo"][0]["id"] @@ -110,7 +88,7 @@ def test_b(self): assert by_name["test_b"][0]["parent_step_id"] == class_id -def test_nested_classes_produce_nested_steps(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_nested_classes_produce_nested_steps(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_nested=dedent( """ @@ -123,7 +101,7 @@ def test_a(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["TestOuter"]) == 1 assert len(by_name["TestInner"]) == 1 @@ -136,7 +114,7 @@ def test_a(self): ] -def test_class_parametrize_nests_under_class(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_class_parametrize_nests_under_class(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_cp=dedent( """ @@ -151,7 +129,7 @@ def test_a(self, v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) class_id = by_name["TestFoo"][0]["id"] test_a_id = by_name["test_a"][0]["id"] @@ -160,7 +138,7 @@ def test_a(self, v): assert by_name["v=2"][0]["parent_step_id"] == test_a_id -def test_two_sibling_classes_in_module(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_two_sibling_classes_in_module(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_sib=dedent( """ @@ -176,7 +154,7 @@ def test_y(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) mod_id = by_name["test_sib.py"][0]["id"] assert by_name["TestA"][0]["parent_step_id"] == mod_id @@ -186,7 +164,7 @@ def test_y(self): assert len(by_name["TestB"]) == 1 -def test_mixed_class_and_free_function(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_mixed_class_and_free_function(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_mix=dedent( """ @@ -201,7 +179,7 @@ def test_free(): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) mod_id = by_name["test_mix.py"][0]["id"] # Class method parents to TestA; free function parents directly to module. @@ -211,7 +189,7 @@ def test_free(): def test_class_with_all_excluded_methods_no_class_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_excl=dedent( @@ -231,14 +209,14 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "TestFoo" not in by_name assert "test_a" not in by_name assert "test_b" not in by_name -def test_sift_exclude_on_class_propagates(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_sift_exclude_on_class_propagates(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_clsexcl=dedent( """ @@ -256,14 +234,14 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "TestFoo" not in by_name assert "test_a" not in by_name def test_class_docstring_becomes_step_description( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_doc=dedent( @@ -278,7 +256,7 @@ def test_a(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # The fake records step creation but not all fields — check the class # step was recorded, then read the description via the FakeStep's @@ -289,7 +267,7 @@ def test_a(self): def test_transition_between_class_chains_drains_parametrize( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_trans=dedent( @@ -310,7 +288,7 @@ def test_y(self, w): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Each class opens exactly once; parametrize parents under the right class. assert len(by_name["TestA"]) == 1 @@ -396,7 +374,7 @@ def __exit__(self, *_: object) -> None: def test_failing_test_in_class_does_not_orphan_class_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """A failing class method must not block the class step from cleaning up. @@ -422,7 +400,7 @@ def test_c(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2, failed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["TestFoo"]) == 1 assert len(by_name["TestBar"]) == 1 @@ -439,7 +417,7 @@ def test_c(self): def test_failing_parametrized_method_in_class_closes_full_chain( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """A failing parametrized class method must not orphan its parametrize parents.""" pytester.makepyfile( @@ -460,7 +438,7 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2, failed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) foo_id = by_name["TestFoo"][0]["id"] test_a_id = by_name["test_a"][0]["id"] @@ -476,18 +454,19 @@ def test_b(self): # --------------------------------------------------------------------------- -def _write_ini(pytester: pytest.Pytester, **overrides: object) -> None: - """Write a pytest.ini with the given sift_* overrides set under [pytest].""" - lines = ["[pytest]"] +def _write_ini(pytester: pytest.Pytester, log_file: Path, **overrides: object) -> None: + """Write a pytest.ini with the given sift_* overrides, preserving the + offline/log/git-metadata defaults the ``log_file`` fixture installs.""" + lines = _base_ini_lines(log_file) for key, value in overrides.items(): lines.append(f"{key} = {value}") pytester.makefile(".ini", pytest="\n".join(lines) + "\n") def test_sift_class_step_false_skips_class_steps( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: - _write_ini(pytester, sift_class_step="false") + _write_ini(pytester, log_file, sift_class_step="false") pytester.makepyfile( test_noclass=dedent( """ @@ -502,7 +481,7 @@ def test_b(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "TestFoo" not in by_name mod_id = by_name["test_noclass.py"][0]["id"] @@ -511,9 +490,9 @@ def test_b(self): def test_sift_module_step_false_skips_module_step( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: - _write_ini(pytester, sift_module_step="false") + _write_ini(pytester, log_file, sift_module_step="false") pytester.makepyfile( test_nomod=dedent( """ @@ -525,7 +504,7 @@ def test_a(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "test_nomod.py" not in by_name # TestFoo attaches to the report root (no parent recorded by the fake). @@ -534,9 +513,9 @@ def test_a(self): def test_sift_parametrize_nesting_false_keeps_flat_leaves( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: - _write_ini(pytester, sift_parametrize_nesting="false") + _write_ini(pytester, log_file, sift_parametrize_nesting="false") pytester.makepyfile( test_flat=dedent( """ @@ -550,7 +529,7 @@ def test_a(v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # No parametrize parent step. assert "test_a" not in by_name @@ -564,7 +543,7 @@ def test_a(v): def test_sift_module_step_false_still_drains_across_modules( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """sift_module_step=false must not merge same-named classes across modules. @@ -572,7 +551,7 @@ def test_sift_module_step_false_still_drains_across_modules( (even when it's not rendered as a step), so two modules each declaring ``class TestFoo`` produce two distinct ``TestFoo`` frames in the diff. """ - _write_ini(pytester, sift_module_step="false") + _write_ini(pytester, log_file, sift_module_step="false") pytester.makepyfile( test_a=dedent( """ @@ -591,7 +570,7 @@ def test_y(self): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Two distinct TestFoo class steps — one per module — not a shared frame. assert len(by_name["TestFoo"]) == 2 @@ -605,7 +584,7 @@ def test_y(self): def test_package_step_default_opens_for_init_dirs( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """Default: a directory with ``__init__.py`` produces a parent package step.""" pytester.mkpydir("pkg_a") @@ -619,7 +598,7 @@ def test_one(): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "pkg_a" in by_name pkg_id = by_name["pkg_a"][0]["id"] @@ -628,7 +607,7 @@ def test_one(): def test_same_named_packages_in_different_dirs_do_not_merge( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """Two packages with the same display name but different paths must stay distinct. @@ -663,7 +642,7 @@ def test_two(): # name on disk don't collide during sys.path-based import. result = pytester.runpytest_subprocess("-v", "--import-mode=importlib") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Two distinct ``utils`` package steps — one per project. assert len(by_name["utils"]) == 2 @@ -677,10 +656,10 @@ def test_two(): def test_sift_package_step_false_skips_package_steps( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: """With ``sift_package_step=false`` the directory step is suppressed.""" - _write_ini(pytester, sift_package_step="false") + _write_ini(pytester, log_file, sift_package_step="false") pytester.mkpydir("pkg_a") (pytester.path / "pkg_a" / "test_x.py").write_text( dedent( @@ -692,7 +671,7 @@ def test_one(): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert "pkg_a" not in by_name # The module step still opens and is now the top-level frame. @@ -700,10 +679,11 @@ def test_one(): def test_all_three_flags_false_matches_legacy_behavior( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: _write_ini( pytester, + log_file, sift_module_step="false", sift_class_step="false", sift_parametrize_nesting="false", @@ -722,7 +702,7 @@ def test_a(self, v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # No module, class, or parametrize parents — just bracket-mangled leaves. assert "test_legacy.py" not in by_name @@ -740,7 +720,7 @@ def test_a(self, v): def test_single_parametrize_clusters_under_originalname( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_rail=dedent( @@ -755,7 +735,7 @@ def test_rail(v): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Module step + one shared `test_rail` parent + two leaves. assert len(by_name["test_rail.py"]) == 1 @@ -768,7 +748,7 @@ def test_rail(v): def test_stacked_parametrize_nests_outer_to_inner( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_iso=dedent( @@ -784,7 +764,7 @@ def test_iso(voltage, component): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=4) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # One `test_iso` parent, two `voltage='…'` parents, four `component='…'` leaves. assert len(by_name["test_iso"]) == 1 @@ -806,7 +786,7 @@ def test_iso(voltage, component): assert leaf["parent_step_id"] in voltage_ids -def test_fixture_parametrization_participates(pytester: pytest.Pytester, steps_file: Path) -> None: +def test_fixture_parametrization_participates(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_widget=dedent( """ @@ -823,7 +803,7 @@ def test_widget(widget): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=2) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) assert len(by_name["test_widget"]) == 1 parent_id = by_name["test_widget"][0]["id"] @@ -832,7 +812,7 @@ def test_widget(widget): def test_module_boundary_isolates_parametrize_stack( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_a=dedent( @@ -856,7 +836,7 @@ def test_two(w): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=4) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) by_name = _by_name(steps) # Each module step contains its own `test_one`/`test_two` parametrize subtree. mod_a = by_name["test_a.py"][0] @@ -866,7 +846,7 @@ def test_two(w): def test_leaf_parent_chain_terminates_at_report( - pytester: pytest.Pytester, steps_file: Path + pytester: pytest.Pytester, log_file: Path ) -> None: pytester.makepyfile( test_chain=dedent( @@ -882,7 +862,7 @@ def test_chain(a, b): ) result = pytester.runpytest_subprocess("-v") result.assert_outcomes(passed=1) - steps = json.loads(steps_file.read_text()) + steps = capture.load_steps(log_file) leaf = next(s for s in steps if s["name"].startswith("b=")) chain = _ancestor_names(steps, leaf) # leaf b=… → a=… → test_chain → test_chain.py (module step) → root diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py index a05d18982..a950cdb7d 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -27,155 +27,41 @@ _INNER_CONFTEST_SRC = ''' -"""Auto-generated conftest for the step-status characterization suite. - -Installs the Sift pytest plugin and a fake ``sift_client`` that records -every step status write into the outer test's CAPTURED_STEPS dict. +"""Auto-generated conftest. Loading the Sift plugin is the only thing the +inner session needs. ``--sift-offline`` on the CLI causes the plugin's +default ``sift_client`` fixture to construct a placeholder client and the +real ``ReportContext`` writes every API call to the JSONL log without +contacting Sift. """ -from __future__ import annotations - -import uuid - -import pytest - -# Bring the Sift fixtures + the makereport hook into this inner session. pytest_plugins = ["sift_client.pytest_plugin"] - -from sift_client._tests.pytest_plugin._step_status_capture import CAPTURED_STEPS, CapturedStep -from sift_client.sift_types.test_report import ( - TestMeasurement, - TestReport, - TestStep, -) - - - -class _FakeTestResults: - def __init__(self, client): - self._client = client - - def create(self, test_report, log_file=None): - report = TestReport( - id_=str(uuid.uuid4()), - status=test_report.status, - name=test_report.name, - test_system_name=test_report.test_system_name, - test_case=test_report.test_case, - start_time=test_report.start_time, - end_time=test_report.end_time, - metadata=test_report.metadata or {}, - is_archived=False, - ) - report._apply_client_to_instance(self._client) - return report - - def update(self, test_report, update, log_file=None): - return test_report - - def create_step(self, test_step, log_file=None): - step_id = str(uuid.uuid4()) - CAPTURED_STEPS[step_id] = CapturedStep( - step_id=step_id, - name=test_step.name, - step_path=test_step.step_path, - parent_step_id=test_step.parent_step_id, - statuses=[test_step.status], - ) - step = TestStep( - id_=step_id, - test_report_id=test_step.test_report_id, - parent_step_id=test_step.parent_step_id, - name=test_step.name, - description=test_step.description, - step_type=test_step.step_type, - step_path=test_step.step_path, - status=test_step.status, - start_time=test_step.start_time, - end_time=test_step.end_time, - error_info=test_step.error_info, - ) - step._apply_client_to_instance(self._client) - return step - - def update_step(self, test_step, update, log_file=None): - new_status = ( - update.get("status") if isinstance(update, dict) else update.status - ) - if test_step.id_ in CAPTURED_STEPS and new_status is not None: - CAPTURED_STEPS[test_step.id_].statuses.append(new_status) - merged_status = new_status if new_status is not None else test_step.status - updated = TestStep( - id_=test_step.id_, - test_report_id=test_step.test_report_id, - parent_step_id=test_step.parent_step_id, - name=test_step.name, - description=test_step.description, - step_type=test_step.step_type, - step_path=test_step.step_path, - status=merged_status, - start_time=test_step.start_time, - end_time=test_step.end_time, - error_info=test_step.error_info, - ) - updated._apply_client_to_instance(self._client) - return updated - - def create_measurement(self, test_measurement, update_step=False, log_file=None): - measurement = TestMeasurement( - id_=str(uuid.uuid4()), - measurement_type=test_measurement.measurement_type, - name=test_measurement.name, - test_step_id=test_measurement.test_step_id, - numeric_value=test_measurement.numeric_value, - string_value=test_measurement.string_value, - boolean_value=test_measurement.boolean_value, - unit=test_measurement.unit, - numeric_bounds=test_measurement.numeric_bounds, - string_expected_value=test_measurement.string_expected_value, - passed=test_measurement.passed, - timestamp=test_measurement.timestamp, - ) - measurement._apply_client_to_instance(self._client) - return measurement - - -class _FakePing: - def ping(self): - return None - - -class _FakeSiftClient: - _simulate = False - - def __init__(self): - self.test_results = _FakeTestResults(self) - self.ping = _FakePing() - - -@pytest.fixture(scope="session") -def sift_client(): - return _FakeSiftClient() ''' -_RUN_ARGS = ( - "--sift-log-file=false", - "--no-sift-git-metadata", -) - - @pytest.fixture def inner(pytester): - """Reset the capture state and install the inner conftest. Returns ``pytester``.""" - capture.reset() + """Install the inner conftest. Returns ``pytester``.""" pytester.makeconftest(_INNER_CONFTEST_SRC) return pytester +# Prepended to every inner test file. Pytest skips marker-based ``skip`` items +# before any autouse fixture runs, which would leave ``REPORT_CONTEXT`` unset +# and the plugin's inline-skip recording inert. A single passing item up-front +# forces ``report_context`` to initialize so the makereport hook can record +# the skip into the same session's JSONL. +_WARMUP = "def test_sift_warmup(): pass\n\n" + + def _run(pytester, body: str) -> None: - pytester.makepyfile(textwrap.dedent(body)) - pytester.runpytest_inprocess(*_RUN_ARGS) + pytester.makepyfile(_WARMUP + textwrap.dedent(body)) + log_path = pytester.path / "sift.log" + capture.set_log(log_path) + pytester.runpytest_inprocess( + "--sift-offline", + f"--sift-log-file={log_path}", + "--no-sift-git-metadata", + ) # --------------------------------------------------------------------------- @@ -219,7 +105,7 @@ def test_x(): assert capture.final_status("test_x") == TestStatus.ERROR -def test_system_exit_maps_to_error(inner): +def test_system_exit_maps_to_aborted(inner): # Case: CALL-05 _run( inner, @@ -229,7 +115,7 @@ def test_x(): sys.exit(1) """, ) - assert capture.final_status("test_x") == TestStatus.ERROR + assert capture.final_status("test_x") == TestStatus.ABORTED def test_pytest_fail_maps_to_failed(inner): @@ -621,3 +507,35 @@ def test_x(step): outer = capture.test_step("test_x") assert outer is not None assert outer.statuses[-1] == TestStatus.PASSED + + +def test_abort_inside_substep_marks_every_open_step_aborted(inner): + # Case: API-06 + _run( + inner, + """ + import sys + def test_x(step): + with step.substep(name="completed_sub"): + pass + with step.substep(name="outer_sub") as outer_sub: + with outer_sub.substep(name="inner_sub"): + sys.exit(1) + """, + ) + # SystemExit unwinds the substep stack on the way out. Every step that was + # open when the abort fired (inner substep, outer substep, test step) + # must record ABORTED. The sibling substep that closed cleanly before the + # abort must retain its PASSED status. + outer = capture.test_step("test_x") + assert outer is not None + assert outer.statuses[-1] == TestStatus.ABORTED + outer_sub = next(iter(capture.steps_by_name("outer_sub")), None) + inner_sub = next(iter(capture.steps_by_name("inner_sub")), None) + completed_sub = next(iter(capture.steps_by_name("completed_sub")), None) + assert outer_sub is not None + assert inner_sub is not None + assert completed_sub is not None + assert outer_sub.statuses[-1] == TestStatus.ABORTED + assert inner_sub.statuses[-1] == TestStatus.ABORTED + assert completed_sub.statuses[-1] == TestStatus.PASSED diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 07704383d..68304cbf7 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -582,13 +582,11 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: status = TestStatus.FAILED elif isinstance(excinfo.value, pytest.fail.Exception): status = TestStatus.FAILED - elif isinstance(excinfo.value, KeyboardInterrupt): - # Hold status at IN_PROGRESS; a session-aborting interrupt should - # not look like a clean pass. Keep the managed flag so the default - # exit path does not coerce IN_PROGRESS to PASSED. - keep_managed = True - elif isinstance(excinfo.value, SystemExit): - status = TestStatus.ERROR + elif isinstance(excinfo.value, (KeyboardInterrupt, SystemExit)): + # Hard exits the plugin can observe: pytest converted the + # raise into a call-phase report. The session-aborting variant + # (call_phase is None) lands earlier and stays IN_PROGRESS. + status = TestStatus.ABORTED error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) elif xfail_marker is not None: # xfail(raises=X) with a non-matching exception: the contract failed. diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 0f6097abc..5fd090bfc 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -385,30 +385,22 @@ def mark_step_failed_after_close(self, step: TestStep): if len(path_parts) > 1: self.open_step_results[".".join(path_parts[:-1])] = False - def resolve_and_propagate_step_result( - self, - step: TestStep, - error_info: ErrorInfo | None = None, - ) -> bool: - """Resolve the result of a step and propagate the result to the parent step if it failed.""" - result = self.open_step_results.get(step.step_path, True) - if error_info: - result = False - if step.status != TestStatus.IN_PROGRESS: - # The step was manually completed so use that result. - # Skipped steps are considered passed. - result = step.status in (TestStatus.PASSED, TestStatus.SKIPPED) - - # Update the parent step results if this step failed (true by default so no need to do anything if we didn't fail). - if not result: + def propagate_step_result(self, step: TestStep, status: TestStatus) -> bool: + """Propagate this step's final status to the parent step. + + Status is the governor: anything outside ``{PASSED, SKIPPED}`` counts + as a failure for the parent. ``error_info`` is intentionally not + consulted here; it is free-form diagnostic data that may sit on a + step regardless of status. + """ + succeeded = status in (TestStatus.PASSED, TestStatus.SKIPPED) + if not succeeded: self.any_failures = True self.open_step_results[step.step_path] = False path_parts = step.step_path.split(".") if len(path_parts) > 1: - parent_step_path = ".".join(path_parts[:-1]) - self.open_step_results[parent_step_path] = False - - return result + self.open_step_results[".".join(path_parts[:-1])] = False + return succeeded def exit_step(self, step: TestStep): """Exit a step and update the report context.""" @@ -494,26 +486,42 @@ def update_step_from_result( returns: The false if step failed or errored, true otherwise. """ error_info = None + aborted = False assert self.current_step is not None if exc: if isinstance(exc_value, AssertionError) and not self.assertion_as_fail_not_error: # If we're not showing assertion errors (i.e. pytest), mark step as failed but don't set error info. self.report_context.record_step_outcome(False, self.current_step) + elif isinstance(exc_value, (KeyboardInterrupt, SystemExit)): + # Hard exit propagating through the substep stack: record as + # ABORTED so every in-progress step on the way out reflects + # the abort rather than coercing to ERROR. + aborted = True + error_info = format_truncated_traceback(exc, exc_value, tb) else: error_info = format_truncated_traceback(exc, exc_value, tb) - # Resolve the status of this step (i.e. fail if children failed) and propagate the result to the parent step. - result = self.report_context.resolve_and_propagate_step_result( - self.current_step, error_info - ) - - # Mark the step as completed + # Status is the governor: anything other than IN_PROGRESS was set + # deliberately (manual override, plugin pre-resolution, etc.) and must + # not be silently overwritten by side-channel signals. When the step is + # still IN_PROGRESS, fill it in: hard-exit aborts first, then captured + # exceptions, then the substep-driven pass/fail read from + # open_step_results. status = self.current_step.status if status == TestStatus.IN_PROGRESS: - # Update the status only if the step was in progress i.e. not updated elsewhere. - status = TestStatus.PASSED if result else TestStatus.FAILED - if error_info: - status = TestStatus.ERROR + if aborted: + status = TestStatus.ABORTED + elif error_info: + status = TestStatus.ERROR + else: + children_passed = self.report_context.open_step_results.get( + self.current_step.step_path, True + ) + status = TestStatus.PASSED if children_passed else TestStatus.FAILED + + # Propagate based on the resolved status; error_info rides along as + # pure diagnostics and does not affect propagation. + result = self.report_context.propagate_step_result(self.current_step, status) self.current_step.update( { "status": status, @@ -527,12 +535,12 @@ def update_step_from_result( def __exit__(self, exc, exc_value, tb): if getattr(self, "_sift_managed_externally", False): # The pytest fixture already resolved status from phase reports. - # Run the standard propagation so the parent step sees this step's - # pass/fail, emit one update_step with the resolved values, and pop - # from the stack without re-classifying. + # Propagate based on that resolved status, emit one update_step + # with the resolved values, and pop from the stack without + # re-classifying. assert self.current_step is not None - result = self.report_context.resolve_and_propagate_step_result( - self.current_step, self.current_step.error_info + result = self.report_context.propagate_step_result( + self.current_step, self.current_step.status ) self.current_step.update( { From 798d5ab7d8720bb619b8dfe62067d37a849f0910 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Fri, 22 May 2026 20:52:48 -0700 Subject: [PATCH 4/6] add pass/fail behavior guide --- python/docs/examples/pytest_plugin.md | 4 + .../pytest_plugin/pass_fail_behavior.md | 187 ++++++++++++++++++ python/mkdocs.yml | 3 + 3 files changed, 194 insertions(+) create mode 100644 python/docs/guides/pytest_plugin/pass_fail_behavior.md diff --git a/python/docs/examples/pytest_plugin.md b/python/docs/examples/pytest_plugin.md index c464e564e..5a40d450d 100644 --- a/python/docs/examples/pytest_plugin.md +++ b/python/docs/examples/pytest_plugin.md @@ -306,6 +306,10 @@ outcomes into `TestStatus`: | Non-`AssertionError` exception escapes the test (e.g. `ValueError`, `TimeoutError`) | `ERROR`, with the formatted traceback (last 10 frames plus the first frame) on `step.error_info.error_message` | | Manual `step.current_step.update({"status": ...})` | Whatever you set; the step exit handler honors a manually-resolved status | +For the full contract, including skips, xfail/xpass, hard exits (`SystemExit`, +`KeyboardInterrupt`), setup/teardown phase failures, and propagation rules, +see the [Pass/Fail Behavior guide](../guides/pytest_plugin/pass_fail_behavior.md). + A failure or error at any depth propagates upward: the parent substep, the function step, the class/module/package steps above it, and the session report all get marked failed. diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md new file mode 100644 index 000000000..0c9636fe6 --- /dev/null +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -0,0 +1,187 @@ +# Pass/Fail Behavior + +The pytest plugin maps every pytest outcome to a `TestStatus` on the +corresponding Sift step. Use this page to look up what a given test will +produce, and how that result rolls up to the parent steps and the report. + +## `TestStatus` values + +The statuses below come from `sift_client.sift_types.test_report.TestStatus`. + +| Status | Meaning | +| ------------- | ---------------------------------------------------------------------------------------------------- | +| `PASSED` | The step completed and every check it owns succeeded. | +| `FAILED` | An assertion, a `pytest.fail(...)`, a failed `report_outcome`, or a failing measurement marked it. | +| `ERROR` | An unexpected exception escaped the test body or a fixture (setup or teardown). | +| `ABORTED` | A hard exit (`SystemExit`, observed `KeyboardInterrupt`) interrupted the test. | +| `SKIPPED` | The test was skipped at collection time, at runtime, or from a fixture. | +| `IN_PROGRESS` | The plugin never observed a final outcome (e.g. a session-aborting interrupt killed pytest first). | + +## Normal test outcomes + +| Scenario | Trigger | Outcome | +| ----------------------------------------- | ------------------------------------ | -------- | +| Test passes | function body returns cleanly | `PASSED` | +| Assertion failure | `assert 1 == 2` | `FAILED` | +| `pytest.fail("...")` from the body | `pytest.fail("intentional failure")` | `FAILED` | +| Uncaught non-assertion exception | `raise ValueError("boom")` | `ERROR` | + +A non-assertion exception gets its formatted traceback recorded on +`step.error_info.error_message` (first frame plus the last ten frames). +Assertions and `pytest.fail` skip the traceback; pytest already prints one +in the runner output. + +## Hard exits + +Hard exits the plugin can observe map to `ABORTED`. If pytest tears the +session down before the plugin sees the exit, the step stays at +`IN_PROGRESS` instead of resolving. + +| Scenario | Trigger | Outcome | +| ---------------------------------------------- | ------------------------- | -------------------------------------------------------------------- | +| `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | +| `KeyboardInterrupt` the plugin observes | `raise KeyboardInterrupt` | `ABORTED` | +| Session-aborting `KeyboardInterrupt` | Ctrl-C terminates pytest | `IN_PROGRESS` (session ends before the plugin's hooks fire) | + +### Abort propagation through nested substeps + +When a hard exit fires inside nested substeps, the exception unwinds the +substep stack. Every step that was open when the abort fired records +`ABORTED`. A sibling substep that closed cleanly before the abort keeps +its prior status. + +```python title="test_abort.py" +import sys + + +def test_x(step): + with step.substep(name="completed_sub"): + pass # closes as PASSED before the abort + with step.substep(name="outer_sub") as outer_sub: + with outer_sub.substep(name="inner_sub"): + sys.exit(1) # ABORTED unwinds through inner_sub, outer_sub, and the test step +``` + +The Sift report shows `completed_sub` as `PASSED` and the three steps +still open at the abort (`inner_sub`, `outer_sub`, and the test step +itself) as `ABORTED`. + +## Skips + +| Scenario | Trigger | Outcome | +| ------------------------------------- | --------------------------------------------- | --------- | +| Collection-time skip | `@pytest.mark.skip(reason=...)` | `SKIPPED` | +| Conditional collection-time skip | `@pytest.mark.skipif(True, reason=...)` | `SKIPPED` | +| Runtime skip from the test body | `pytest.skip("...")` | `SKIPPED` | +| Skip raised inside a fixture | `@pytest.fixture` calls `pytest.skip("...")` | `SKIPPED` | + +`SKIPPED` does not propagate as a failure. A skipped substep or test does +not block its parent from resolving to `PASSED`. + +!!! info "Collection-time skip path" + `@pytest.mark.skip` and `@pytest.mark.skipif` short-circuit before any + autouse fixture fires, so the plugin records the step from inside its + `pytest_runtest_makereport` hook rather than from the `step` fixture. + Runtime skips (`pytest.skip(...)`) and skips from inside a fixture go + through the normal fixture path, with the autouse `step` resolving to + `SKIPPED`. + +## Expected failures (xfail / xpass) + +xfail marks declare that a test is expected to fail. The plugin follows +the same semantics pytest does. + +| Scenario | Trigger | Outcome | +| ----------------------------------------- | ---------------------------------------------------------- | ------------------------------------------------------------- | +| xfail-marked test that fails | `@pytest.mark.xfail` + `assert 1 == 2` | `PASSED` (the test fulfilled the xfail expectation) | +| Strict xfail that unexpectedly passes | `@pytest.mark.xfail(strict=True)` + `assert True` | `FAILED` (the mark no longer matches reality) | +| Non-strict xfail that unexpectedly passes | `@pytest.mark.xfail()` + `assert True` | `PASSED` (`strict=False` does not insist on the failure) | +| `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` (the `raises=` mismatch is a real test failure) | +| `xfail(run=False)` | `@pytest.mark.xfail(run=False)` | `SKIPPED` (the body never ran) | + +## Setup and teardown phases + +Fixture failures land on the test step the fixture was resolving for. +Setup-phase and teardown-phase failures resolve differently: + +| Scenario | Trigger | Outcome | +| -------------------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------- | +| Setup-phase fixture failure | `@pytest.fixture` raises before `yield`; body never runs | `ERROR` | +| Teardown-phase fixture failure | `@pytest.fixture` raises after `yield`; body passed | `FAILED` (the plugin upgrades the closed step after teardown) | +| Call-phase failure plus teardown failure | `assert 1 == 2` in the body AND a `@pytest.fixture` raises after `yield` | `FAILED` (the call-phase failure dominates) | + +## Collection and fixture resolution + +| Scenario | Trigger | Outcome | +| --------------- | ---------------------------------- | ------- | +| Missing fixture | `def test_x(nonexistent_fixture):` | `ERROR` (surfaces as a setup-phase failure) | + +## Influencing outcomes from test code + +A test can also set the step's outcome directly via the helpers below. +Substeps your test opens follow the same propagation rules as the ones +the plugin opens for you. + +### Manual status override + +`step.current_step.update({...})` sets the status directly. The step's +exit handler does not overwrite it. + +```python +from sift_client.sift_types.test_report import TestStatus + + +def test_manual(step): + step.current_step.update({"status": TestStatus.FAILED}) +``` + +### `report_outcome` for externally computed checks + +`report_outcome(name, result, reason)` records a named check whose +pass/fail was computed elsewhere (a subprocess, a remote system, your own +comparison logic). A failing outcome marks the step `FAILED`. + +```python +def test_external_check(step): + result, reason = run_external_validator() + step.report_outcome("ext-validator", result, reason) +``` + +### Measurements with bounds + +`step.measure(name=, value=, bounds=)` records a measurement and resolves +the step to `FAILED` if the value is out of bounds. The call returns the +pass/fail boolean and does not raise, so multiple measurements can run +without short-circuiting. + +```python +def test_battery(step): + step.measure(name="voltage", value=12.1, bounds={"min": 11.5, "max": 13.0}, unit="V") + step.measure(name="current", value=0.42, bounds={"max": 1.0}, unit="A") +``` + +### Substep failures + +A failed substep propagates failure to its parent step. A manually-set +`SKIPPED` on a substep does not. + +```python +def test_with_substep(step): + with step.substep(name="check") as inner: + inner.measure(name="value", value=99.0, bounds={"min": 0.0, "max": 5.0}) + # The outer step resolves to FAILED because the substep failed. +``` + +## Propagation rules + +Steps the plugin opens (function, class, module, package) and substeps +your test opens follow the same rules: + +- A step whose status is anything other than `PASSED` or `SKIPPED` + propagates failure to its parent and to the report. +- `SKIPPED` does not propagate as a failure. +- A status set explicitly via `current_step.update` is kept; the plugin + does not overwrite it. +- `error_info` is free-form diagnostic data. The step's status is the + authoritative signal; `error_info` rides along for the dashboard but + does not change how propagation behaves. diff --git a/python/mkdocs.yml b/python/mkdocs.yml index 5108b7e4a..af174aa4f 100644 --- a/python/mkdocs.yml +++ b/python/mkdocs.yml @@ -62,6 +62,9 @@ nav: # Will migrate to Guides in the future - Pytest Plugin: examples/pytest_plugin.md - Pytest Plugin Quickstart: examples/pytest_plugin_quickstart.md + - Guides: + - Pytest Plugin: + - Pass/Fail Behavior: guides/pytest_plugin/pass_fail_behavior.md # - Guides: # - Logging # - Error Handling From 1e7ee6f5e43bb0055ff1ca8aebb1cf6b6a3ce5ba Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 10:20:29 -0700 Subject: [PATCH 5/6] update docs --- .../pytest_plugin/pass_fail_behavior.md | 82 +++++++------------ 1 file changed, 30 insertions(+), 52 deletions(-) diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md index 0c9636fe6..8ec57701f 100644 --- a/python/docs/guides/pytest_plugin/pass_fail_behavior.md +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -8,14 +8,14 @@ produce, and how that result rolls up to the parent steps and the report. The statuses below come from `sift_client.sift_types.test_report.TestStatus`. -| Status | Meaning | -| ------------- | ---------------------------------------------------------------------------------------------------- | -| `PASSED` | The step completed and every check it owns succeeded. | -| `FAILED` | An assertion, a `pytest.fail(...)`, a failed `report_outcome`, or a failing measurement marked it. | -| `ERROR` | An unexpected exception escaped the test body or a fixture (setup or teardown). | -| `ABORTED` | A hard exit (`SystemExit`, observed `KeyboardInterrupt`) interrupted the test. | -| `SKIPPED` | The test was skipped at collection time, at runtime, or from a fixture. | -| `IN_PROGRESS` | The plugin never observed a final outcome (e.g. a session-aborting interrupt killed pytest first). | +| Status | Meaning | +| ------------- |------------------------------------------------------------------------------------------------------------------------| +| `PASSED` | The step completed and every check it owns succeeded. | +| `FAILED` | An assertion, a `pytest.fail(...)`, a failed `report_outcome`, or a failing measurement marked it. | +| `ERROR` | An unexpected exception escaped the test body or a fixture (setup or teardown). | +| `ABORTED` | A hard exit (`SystemExit`, observed `KeyboardInterrupt`) interrupted the test. | +| `SKIPPED` | The test was skipped at collection time, at runtime, or from a fixture. | +| `IN_PROGRESS` | Test in progress or the plugin never observed a final outcome (e.g. a session-aborting interrupt killed pytest first). | ## Normal test outcomes @@ -27,9 +27,7 @@ The statuses below come from `sift_client.sift_types.test_report.TestStatus`. | Uncaught non-assertion exception | `raise ValueError("boom")` | `ERROR` | A non-assertion exception gets its formatted traceback recorded on -`step.error_info.error_message` (first frame plus the last ten frames). -Assertions and `pytest.fail` skip the traceback; pytest already prints one -in the runner output. +`step.error_info.error_message`. ## Hard exits @@ -45,10 +43,8 @@ session down before the plugin sees the exit, the step stays at ### Abort propagation through nested substeps -When a hard exit fires inside nested substeps, the exception unwinds the -substep stack. Every step that was open when the abort fired records -`ABORTED`. A sibling substep that closed cleanly before the abort keeps -its prior status. +Every step that was open when the abort fired records +`ABORTED`. ```python title="test_abort.py" import sys @@ -59,7 +55,7 @@ def test_x(step): pass # closes as PASSED before the abort with step.substep(name="outer_sub") as outer_sub: with outer_sub.substep(name="inner_sub"): - sys.exit(1) # ABORTED unwinds through inner_sub, outer_sub, and the test step + sys.exit(1) # ABORTED applied to inner_sub, outer_sub, and the test step ``` The Sift report shows `completed_sub` as `PASSED` and the three steps @@ -78,14 +74,6 @@ itself) as `ABORTED`. `SKIPPED` does not propagate as a failure. A skipped substep or test does not block its parent from resolving to `PASSED`. -!!! info "Collection-time skip path" - `@pytest.mark.skip` and `@pytest.mark.skipif` short-circuit before any - autouse fixture fires, so the plugin records the step from inside its - `pytest_runtest_makereport` hook rather than from the `step` fixture. - Runtime skips (`pytest.skip(...)`) and skips from inside a fixture go - through the normal fixture path, with the autouse `step` resolving to - `SKIPPED`. - ## Expected failures (xfail / xpass) xfail marks declare that a test is expected to fail. The plugin follows @@ -99,23 +87,6 @@ the same semantics pytest does. | `xfail(raises=...)` with wrong exception | `@pytest.mark.xfail(raises=ValueError)` + `raise KeyError` | `FAILED` (the `raises=` mismatch is a real test failure) | | `xfail(run=False)` | `@pytest.mark.xfail(run=False)` | `SKIPPED` (the body never ran) | -## Setup and teardown phases - -Fixture failures land on the test step the fixture was resolving for. -Setup-phase and teardown-phase failures resolve differently: - -| Scenario | Trigger | Outcome | -| -------------------------------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------- | -| Setup-phase fixture failure | `@pytest.fixture` raises before `yield`; body never runs | `ERROR` | -| Teardown-phase fixture failure | `@pytest.fixture` raises after `yield`; body passed | `FAILED` (the plugin upgrades the closed step after teardown) | -| Call-phase failure plus teardown failure | `assert 1 == 2` in the body AND a `@pytest.fixture` raises after `yield` | `FAILED` (the call-phase failure dominates) | - -## Collection and fixture resolution - -| Scenario | Trigger | Outcome | -| --------------- | ---------------------------------- | ------- | -| Missing fixture | `def test_x(nonexistent_fixture):` | `ERROR` (surfaces as a setup-phase failure) | - ## Influencing outcomes from test code A test can also set the step's outcome directly via the helpers below. @@ -174,14 +145,21 @@ def test_with_substep(step): ## Propagation rules -Steps the plugin opens (function, class, module, package) and substeps -your test opens follow the same rules: - -- A step whose status is anything other than `PASSED` or `SKIPPED` - propagates failure to its parent and to the report. -- `SKIPPED` does not propagate as a failure. -- A status set explicitly via `current_step.update` is kept; the plugin - does not overwrite it. -- `error_info` is free-form diagnostic data. The step's status is the - authoritative signal; `error_info` rides along for the dashboard but - does not change how propagation behaves. +Every non-`PASSED`/`SKIPPED` step marks its parent as failed. What the +parent records depends on whether the failure raised an exception: + +- When an exception is raised, every step it passes through on its way + up records the matching status: `ABORTED` for `SystemExit` and observed + `KeyboardInterrupt`, `ERROR` for everything else. +- When the failure is recorded without raising (failed measurement, + failed `report_outcome`, manual non-PASSED status), the parent only + sees a child-failed signal and resolves to `FAILED` unless its own + logic says otherwise. + +Pytest catches the test body's exception at the test function step, so +the exception-driven chain ends there. Hierarchy parents above (class, +module, package) and the report only see the child-failed signal and +resolve to `FAILED`. + +`SKIPPED` does not propagate. A status set explicitly via +`current_step.update` is kept. From a447c853eafca58ea2e22db84edafe60d978af98 Mon Sep 17 00:00:00 2001 From: Alex Luck Date: Tue, 26 May 2026 11:07:50 -0700 Subject: [PATCH 6/6] codify more explicitly that errors are recorded as ERROR and parent steps FAIL instead of error --- .../pytest_plugin/pass_fail_behavior.md | 27 +++++---- .../pytest_plugin/_step_status_capture.py | 5 +- .../pytest_plugin/step_status_states.md | 1 + .../_tests/pytest_plugin/test_hierarchy.py | 15 ++--- .../_tests/pytest_plugin/test_pass_fail.py | 21 +++++++ python/lib/sift_client/pytest_plugin.py | 27 ++++++--- .../util/test_results/context_manager.py | 58 ++++++++++++------- 7 files changed, 101 insertions(+), 53 deletions(-) diff --git a/python/docs/guides/pytest_plugin/pass_fail_behavior.md b/python/docs/guides/pytest_plugin/pass_fail_behavior.md index 8ec57701f..6e9b1d6e3 100644 --- a/python/docs/guides/pytest_plugin/pass_fail_behavior.md +++ b/python/docs/guides/pytest_plugin/pass_fail_behavior.md @@ -146,20 +146,19 @@ def test_with_substep(step): ## Propagation rules Every non-`PASSED`/`SKIPPED` step marks its parent as failed. What the -parent records depends on whether the failure raised an exception: - -- When an exception is raised, every step it passes through on its way - up records the matching status: `ABORTED` for `SystemExit` and observed - `KeyboardInterrupt`, `ERROR` for everything else. -- When the failure is recorded without raising (failed measurement, - failed `report_outcome`, manual non-PASSED status), the parent only - sees a child-failed signal and resolves to `FAILED` unless its own - logic says otherwise. - -Pytest catches the test body's exception at the test function step, so -the exception-driven chain ends there. Hierarchy parents above (class, -module, package) and the report only see the child-failed signal and -resolve to `FAILED`. +parent records depends on whether its own scope had an abort and whether +a child already failed: + +- A hard exit (`SystemExit` or an observed `KeyboardInterrupt`) in the + step's own scope records `ABORTED`. `ABORTED` propagates through every + step the abort passes through on its way up. +- A child that already recorded a non-`PASSED`/`SKIPPED` outcome marks + the parent as `FAILED`. This holds whether or not an exception is still + propagating through the parent's scope: only the originating substep + records `ERROR`; ancestors inherit `FAILED`. The traceback stays on + the originating step's `error_info`. +- A step records `ERROR` only when its own scope raised a non-Assertion + exception AND no child has failed. `SKIPPED` does not propagate. A status set explicitly via `current_step.update` is kept. diff --git a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py index dd8e8c05a..e92d1726e 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py +++ b/python/lib/sift_client/_tests/pytest_plugin/_step_status_capture.py @@ -11,11 +11,14 @@ import json from dataclasses import dataclass, field -from pathlib import Path +from typing import TYPE_CHECKING from sift_client._internal.low_level_wrappers._test_results_log import iter_log_data_lines from sift_client.sift_types.test_report import TestStatus +if TYPE_CHECKING: + from pathlib import Path + @dataclass class CapturedStep: diff --git a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md index f98d13fee..7e366a512 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md +++ b/python/lib/sift_client/_tests/pytest_plugin/step_status_states.md @@ -37,6 +37,7 @@ be traced back to its row here without rereading the scenario: | `CALL-04` | `pytest.fail("...")` from body | `pytest.fail("intentional failure")` | `FAILED` | | `CALL-05` | `SystemExit` from the test body | `sys.exit(1)` | `ABORTED` | | `CALL-06` | `KeyboardInterrupt` in body | `raise KeyboardInterrupt` | `IN_PROGRESS` — session aborts before the plugin sees the interrupt; `ABORTED` if the plugin does see it | +| `CALL-07` | Substep raises non-Assertion exception | `with step.substep(...): raise ValueError("boom")` | Substep `ERROR`, test step `FAILED` (child-failed signal outranks the propagating exception) | ## Skip paths diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py index 866471ce7..1efd4e817 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_hierarchy.py @@ -63,9 +63,7 @@ def _ancestor_names(steps: list[dict], leaf: dict) -> list[str]: return chain -def test_class_methods_cluster_under_class_step( - pytester: pytest.Pytester, log_file: Path -) -> None: +def test_class_methods_cluster_under_class_step(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_klass=dedent( """ @@ -456,16 +454,15 @@ def test_b(self): def _write_ini(pytester: pytest.Pytester, log_file: Path, **overrides: object) -> None: """Write a pytest.ini with the given sift_* overrides, preserving the - offline/log/git-metadata defaults the ``log_file`` fixture installs.""" + offline/log/git-metadata defaults the ``log_file`` fixture installs. + """ lines = _base_ini_lines(log_file) for key, value in overrides.items(): lines.append(f"{key} = {value}") pytester.makefile(".ini", pytest="\n".join(lines) + "\n") -def test_sift_class_step_false_skips_class_steps( - pytester: pytest.Pytester, log_file: Path -) -> None: +def test_sift_class_step_false_skips_class_steps(pytester: pytest.Pytester, log_file: Path) -> None: _write_ini(pytester, log_file, sift_class_step="false") pytester.makepyfile( test_noclass=dedent( @@ -845,9 +842,7 @@ def test_two(w): assert by_name["test_two"][0]["parent_step_id"] == mod_b["id"] -def test_leaf_parent_chain_terminates_at_report( - pytester: pytest.Pytester, log_file: Path -) -> None: +def test_leaf_parent_chain_terminates_at_report(pytester: pytest.Pytester, log_file: Path) -> None: pytester.makepyfile( test_chain=dedent( """ diff --git a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py index a950cdb7d..0e1540ce7 100644 --- a/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py +++ b/python/lib/sift_client/_tests/pytest_plugin/test_pass_fail.py @@ -153,6 +153,27 @@ def test_x(): assert outer.statuses[-1] == TestStatus.IN_PROGRESS +def test_substep_exception_records_error_with_failed_parent(inner): + # Case: CALL-07 + _run( + inner, + """ + def test_x(step): + with step.substep(name="inner"): + raise ValueError("boom") + """, + ) + # Only the originating substep records ERROR. The test step inherits the + # child-failed signal and resolves to FAILED, even though the same + # ValueError propagated through its scope. + inner_sub = next(iter(capture.steps_by_name("inner")), None) + test_x = capture.test_step("test_x") + assert inner_sub is not None + assert test_x is not None + assert inner_sub.statuses[-1] == TestStatus.ERROR + assert test_x.statuses[-1] == TestStatus.FAILED + + # --------------------------------------------------------------------------- # Skip paths # --------------------------------------------------------------------------- diff --git a/python/lib/sift_client/pytest_plugin.py b/python/lib/sift_client/pytest_plugin.py index 68304cbf7..c3b303ac8 100644 --- a/python/lib/sift_client/pytest_plugin.py +++ b/python/lib/sift_client/pytest_plugin.py @@ -527,6 +527,11 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: test's own status or substep failures should drive the result), this leaves the step alone so the default ``__exit__`` resolution stays in charge. """ + current_step = new_step.current_step + if current_step is None: + # The step never opened (the autouse fixture short-circuited or was + # disabled). Nothing to resolve. + return setup_phase = getattr(item, "_sift_phase_setup", None) call_phase = getattr(item, "_sift_phase_call", None) xfail_marker = item.get_closest_marker("xfail") @@ -576,6 +581,9 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: status = TestStatus.SKIPPED elif call_phase.report.outcome == "failed": excinfo = call_phase.call.excinfo + children_passed = new_step.report_context.open_step_results.get( + current_step.step_path, True + ) if excinfo is None: status = TestStatus.FAILED elif isinstance(excinfo.value, AssertionError): @@ -592,6 +600,10 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: # xfail(raises=X) with a non-matching exception: the contract failed. status = TestStatus.FAILED error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) + elif not children_passed: + # A substep already recorded the error and carries the traceback; + # the test step only inherits the child-failed signal. + status = TestStatus.FAILED else: status = TestStatus.ERROR error_info = format_truncated_traceback(excinfo.type, excinfo.value, excinfo.tb) @@ -599,12 +611,11 @@ def _resolve_initial_status(new_step: NewStep, item: pytest.Item) -> None: if status is None and not keep_managed: return - assert new_step.current_step is not None if status is not None: # BaseType is frozen; mutate via __dict__ the same way _apply_client_to_instance does. - new_step.current_step.__dict__["status"] = status + current_step.__dict__["status"] = status if error_info is not None: - new_step.current_step.__dict__["error_info"] = error_info + current_step.__dict__["error_info"] = error_info new_step._sift_managed_externally = True @@ -618,10 +629,12 @@ def _finalize_after_teardown(item: pytest.Item, teardown_report: pytest.TestRepo step: NewStep | None = getattr(item, "_sift_step", None) if step is None: return - assert step.current_step is not None - if teardown_report.outcome == "failed" and step.current_step.status == TestStatus.PASSED: - step.current_step.update({"status": TestStatus.FAILED}) - step.report_context.mark_step_failed_after_close(step.current_step) + current_step = step.current_step + if current_step is None: + return + if teardown_report.outcome == "failed" and current_step.status == TestStatus.PASSED: + current_step.update({"status": TestStatus.FAILED}) + step.report_context.mark_step_failed_after_close(current_step) @pytest.hookimpl(tryfirst=True, hookwrapper=True) diff --git a/python/lib/sift_client/util/test_results/context_manager.py b/python/lib/sift_client/util/test_results/context_manager.py index 5fd090bfc..3454ef5e2 100644 --- a/python/lib/sift_client/util/test_results/context_manager.py +++ b/python/lib/sift_client/util/test_results/context_manager.py @@ -421,6 +421,10 @@ class NewStep(AbstractContextManager): client: SiftClient assertion_as_fail_not_error: bool = True current_step: TestStep | None = None + # Set by the pytest plugin's ``_resolve_initial_status`` to signal that + # status was already resolved upstream and ``__exit__`` should skip + # re-classifying. Read via ``getattr`` so unset is treated as False. + _sift_managed_externally: bool = False def __init__( self, @@ -485,13 +489,20 @@ def update_step_from_result( returns: The false if step failed or errored, true otherwise. """ + current_step = self.current_step + if current_step is None: + # The step was never opened; nothing to resolve. Treat as a pass + # so callers that branch on the return value don't see a spurious + # failure. + return True + error_info = None aborted = False - assert self.current_step is not None + errored = False if exc: if isinstance(exc_value, AssertionError) and not self.assertion_as_fail_not_error: # If we're not showing assertion errors (i.e. pytest), mark step as failed but don't set error info. - self.report_context.record_step_outcome(False, self.current_step) + self.report_context.record_step_outcome(False, current_step) elif isinstance(exc_value, (KeyboardInterrupt, SystemExit)): # Hard exit propagating through the substep stack: record as # ABORTED so every in-progress step on the way out reflects @@ -499,30 +510,34 @@ def update_step_from_result( aborted = True error_info = format_truncated_traceback(exc, exc_value, tb) else: + errored = True error_info = format_truncated_traceback(exc, exc_value, tb) # Status is the governor: anything other than IN_PROGRESS was set # deliberately (manual override, plugin pre-resolution, etc.) and must # not be silently overwritten by side-channel signals. When the step is - # still IN_PROGRESS, fill it in: hard-exit aborts first, then captured - # exceptions, then the substep-driven pass/fail read from - # open_step_results. - status = self.current_step.status + # still IN_PROGRESS, resolve from independent state: aborts first, then + # a child-failed signal (parents inherit FAILED, not the originating + # ERROR), then the step's own captured exception, then the children-pass + # default. error_info is diagnostic and never drives status. + status = current_step.status if status == TestStatus.IN_PROGRESS: + children_passed = self.report_context.open_step_results.get( + current_step.step_path, True + ) if aborted: status = TestStatus.ABORTED - elif error_info: + elif not children_passed: + status = TestStatus.FAILED + elif errored: status = TestStatus.ERROR else: - children_passed = self.report_context.open_step_results.get( - self.current_step.step_path, True - ) - status = TestStatus.PASSED if children_passed else TestStatus.FAILED + status = TestStatus.PASSED # Propagate based on the resolved status; error_info rides along as # pure diagnostics and does not affect propagation. - result = self.report_context.propagate_step_result(self.current_step, status) - self.current_step.update( + result = self.report_context.propagate_step_result(current_step, status) + current_step.update( { "status": status, "end_time": datetime.now(timezone.utc), @@ -538,18 +553,19 @@ def __exit__(self, exc, exc_value, tb): # Propagate based on that resolved status, emit one update_step # with the resolved values, and pop from the stack without # re-classifying. - assert self.current_step is not None - result = self.report_context.propagate_step_result( - self.current_step, self.current_step.status - ) - self.current_step.update( + current_step = self.current_step + if current_step is None: + # The step was never opened; nothing to propagate. + return True + result = self.report_context.propagate_step_result(current_step, current_step.status) + current_step.update( { - "status": self.current_step.status, + "status": current_step.status, "end_time": datetime.now(timezone.utc), - "error_info": self.current_step.error_info, + "error_info": current_step.error_info, }, ) - self.report_context.exit_step(self.current_step) + self.report_context.exit_step(current_step) if hasattr(self, "force_result"): result = self.force_result return result