diff --git a/aws/lambda/pytorch-auto-revert/SIGNAL_EXTRACTION.md b/aws/lambda/pytorch-auto-revert/SIGNAL_EXTRACTION.md index 65d07bf1f1..bc9036df23 100644 --- a/aws/lambda/pytorch-auto-revert/SIGNAL_EXTRACTION.md +++ b/aws/lambda/pytorch-auto-revert/SIGNAL_EXTRACTION.md @@ -82,6 +82,22 @@ Notes - Each commit holds a list of `SignalEvent`s (time‑ordered by `started_at`). Ordering: dicts in Python 3.7+ preserve insertion order. Phase A inserts commit keys in push‑timestamp DESC order, so iterating the mapping yields newest→older commits without extra sorting. +### Test‑track semantics +- Source of truth for SUCCESS/FAILURE is `default.test_run_s3` per test id. +- When a test row exists for an attempt: + - Emit at most one FAILURE if any failed runs exist; at most one SUCCESS if any successful runs exist. +- When no test rows exist for an attempt and any grouped job for that attempt is pending → emit PENDING. +- Otherwise (no test rows and not pending) → no event for that attempt. + +### Job‑track semantics (non‑test) +- Build per normalized job base across commits; aggregate shards by `(wf_run_id, run_attempt)`. +- Event mapping per attempt uses aggregated job meta with test‑failure filtering: + - FAILURE only when the attempt had non‑test failures (e.g. infra‑related). + - PENDING when the attempt is still running. + - SUCCESS otherwise, including when failures are exclusively test‑caused (these are handled by test‑track). +- Cancelled attempts are treated as missing (no event). +- Emit a job‑track Signal only when at least one attempt/commit shows a non‑test (infra) failure within the window. + Event naming (for debuggability): - Consistent key=value format: `wf= kind= id= run= attempt=` - Examples: diff --git a/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py b/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py index 5a1d97df2a..de9c5f2eb5 100644 --- a/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py +++ b/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py @@ -482,16 +482,15 @@ def _build_non_test_signals( # Map aggregation verdict to outer SignalStatus if meta.status is None: continue - if meta.status == AggStatus.FAILURE: + if meta.status == AggStatus.FAILURE and meta.has_non_test_failures: # mark presence of non-test failures (relevant for job track) - if meta.has_non_test_failures: - has_relevant_failures = True - + has_relevant_failures = True ev_status = SignalStatus.FAILURE - elif meta.status == AggStatus.SUCCESS: - ev_status = SignalStatus.SUCCESS - else: + elif meta.status == AggStatus.PENDING: ev_status = SignalStatus.PENDING + else: + # Note: when all failures are caused by tests, we do NOT emit job-level failures + ev_status = SignalStatus.SUCCESS # Extract wf_run_id/run_attempt from the attempt key _, _, _, wf_run_id, run_attempt = akey diff --git a/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py b/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py index 823aca57f9..fae6693ac6 100644 --- a/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py +++ b/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py @@ -319,6 +319,46 @@ def test_non_test_inclusion_gate(self): self._find_job_signal(signals_b, "trunk", jobs_b[0].base_name) ) + def test_job_track_treats_test_failures_as_success(self): + # When a base has a non-test (infra) failure somewhere (so a job signal is emitted), + # attempts that fail due to tests should NOT appear as FAILURES in the job track. + # They should be treated as SUCCESS at the job-track level, leaving the failure to test-track. + jobs = [ + # Newer commit: infra-caused failure (non-test classification) + J( + sha="Z2", + run=9100, + job=801, + attempt=1, + started_at=ts(self.t0, 20), + conclusion="failure", + rule="infra", # non-test + ), + # Older commit: failure caused by tests (test classification) + J( + sha="Z1", + run=9000, + job=800, + attempt=1, + started_at=ts(self.t0, 10), + conclusion="failure", + rule="pytest failure", # test-caused + ), + ] + + signals = self._extract(jobs, tests=[]) + base = jobs[0].base_name + sig = self._find_job_signal(signals, "trunk", base) + self.assertIsNotNone(sig) + # Expect commits newest->older + self.assertEqual([c.head_sha for c in sig.commits], ["Z2", "Z1"]) + # Newer infra failure remains FAILURE + self.assertEqual(len(sig.commits[0].events), 1) + self.assertEqual(sig.commits[0].events[0].status, SignalStatus.FAILURE) + # Older test-caused failure is mapped to SUCCESS in job track + self.assertEqual(len(sig.commits[1].events), 1) + self.assertEqual(sig.commits[1].events[0].status, SignalStatus.SUCCESS) + def test_commits_without_jobs_are_included(self): # Verify that commits with no jobs at all are still included in signals # Simulate case where C2 has a failure, C3 has no jobs (e.g., periodic workflow),