pytorch · izaitsevfb · Oct 22, 2025 · Oct 22, 2025
diff --git a/aws/lambda/pytorch-auto-revert/SIGNAL_EXTRACTION.md b/aws/lambda/pytorch-auto-revert/SIGNAL_EXTRACTION.md
@@ -82,6 +82,22 @@ Notes
 - Each commit holds a list of `SignalEvent`s (time‑ordered by `started_at`).
   Ordering: dicts in Python 3.7+ preserve insertion order. Phase A inserts commit keys in push‑timestamp DESC order, so iterating the mapping yields newest→older commits without extra sorting.
 
+### Test‑track semantics
+- Source of truth for SUCCESS/FAILURE is `default.test_run_s3` per test id.
+- When a test row exists for an attempt:
+  - Emit at most one FAILURE if any failed runs exist; at most one SUCCESS if any successful runs exist.
+- When no test rows exist for an attempt and any grouped job for that attempt is pending → emit PENDING.
+- Otherwise (no test rows and not pending) → no event for that attempt.
+
+### Job‑track semantics (non‑test)
+- Build per normalized job base across commits; aggregate shards by `(wf_run_id, run_attempt)`.
+- Event mapping per attempt uses aggregated job meta with test‑failure filtering:
+  - FAILURE only when the attempt had non‑test failures (e.g. infra‑related).
+  - PENDING when the attempt is still running.
+  - SUCCESS otherwise, including when failures are exclusively test‑caused (these are handled by test‑track).
+- Cancelled attempts are treated as missing (no event).
+- Emit a job‑track Signal only when at least one attempt/commit shows a non‑test (infra) failure within the window.
+
 Event naming (for debuggability):
 - Consistent key=value format: `wf=<workflow> kind=<test|job> id=<test_id|job_base> run=<wf_run_id> attempt=<run_attempt>`
 - Examples:

diff --git a/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py b/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/signal_extraction.py
@@ -482,16 +482,15 @@ def _build_non_test_signals(
                     # Map aggregation verdict to outer SignalStatus
                     if meta.status is None:
                         continue
-                    if meta.status == AggStatus.FAILURE:
+                    if meta.status == AggStatus.FAILURE and meta.has_non_test_failures:
                         # mark presence of non-test failures (relevant for job track)
-                        if meta.has_non_test_failures:
-                            has_relevant_failures = True
-
+                        has_relevant_failures = True
                         ev_status = SignalStatus.FAILURE
-                    elif meta.status == AggStatus.SUCCESS:
-                        ev_status = SignalStatus.SUCCESS
-                    else:
+                    elif meta.status == AggStatus.PENDING:
                         ev_status = SignalStatus.PENDING
+                    else:
+                        # Note: when all failures are caused by tests, we do NOT emit job-level failures
+                        ev_status = SignalStatus.SUCCESS
 
                     # Extract wf_run_id/run_attempt from the attempt key
                     _, _, _, wf_run_id, run_attempt = akey

diff --git a/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py b/aws/lambda/pytorch-auto-revert/pytorch_auto_revert/tests/test_signal_extraction.py
@@ -319,6 +319,46 @@ def test_non_test_inclusion_gate(self):
             self._find_job_signal(signals_b, "trunk", jobs_b[0].base_name)
         )
 
+    def test_job_track_treats_test_failures_as_success(self):
+        # When a base has a non-test (infra) failure somewhere (so a job signal is emitted),
+        # attempts that fail due to tests should NOT appear as FAILURES in the job track.
+        # They should be treated as SUCCESS at the job-track level, leaving the failure to test-track.
+        jobs = [
+            # Newer commit: infra-caused failure (non-test classification)
+            J(
+                sha="Z2",
+                run=9100,
+                job=801,
+                attempt=1,
+                started_at=ts(self.t0, 20),
+                conclusion="failure",
+                rule="infra",  # non-test
+            ),
+            # Older commit: failure caused by tests (test classification)
+            J(
+                sha="Z1",
+                run=9000,
+                job=800,
+                attempt=1,
+                started_at=ts(self.t0, 10),
+                conclusion="failure",
+                rule="pytest failure",  # test-caused
+            ),
+        ]
+
+        signals = self._extract(jobs, tests=[])
+        base = jobs[0].base_name
+        sig = self._find_job_signal(signals, "trunk", base)
+        self.assertIsNotNone(sig)
+        # Expect commits newest->older
+        self.assertEqual([c.head_sha for c in sig.commits], ["Z2", "Z1"])
+        # Newer infra failure remains FAILURE
+        self.assertEqual(len(sig.commits[0].events), 1)
+        self.assertEqual(sig.commits[0].events[0].status, SignalStatus.FAILURE)
+        # Older test-caused failure is mapped to SUCCESS in job track
+        self.assertEqual(len(sig.commits[1].events), 1)
+        self.assertEqual(sig.commits[1].events[0].status, SignalStatus.SUCCESS)
+
     def test_commits_without_jobs_are_included(self):
         # Verify that commits with no jobs at all are still included in signals
         # Simulate case where C2 has a failure, C3 has no jobs (e.g., periodic workflow),