From e754847fd26c55c6138f1e0dfd22a40132932486 Mon Sep 17 00:00:00 2001 From: Maple Xu Date: Fri, 8 May 2026 15:02:41 -0400 Subject: [PATCH 1/2] DO NOT MERGE: stress-test langsmith flake fix (1000x per matrix) Temporary experiment branch to validate that removing max_cached_workflows=0 from test_temporal_prefixed_query_not_traced fixes the Windows 3.10 deadlock flake. Changes: - tests/contrib/langsmith/test_integration.py: parametrize the test with range(1000) so each matrix entry runs it 1000x via pytest-xdist; remove max_cached_workflows=0 from the worker config. - .github/workflows/ci.yml (build-lint-test job only): - Run only the parametrized test with -p no:rerunfailures so a single failure is not hidden by retries. - Disable poe lint, poe bridge-lint, the time-skipping test step, and the openai test step for this branch. - Bump job timeout to 60 min. - Step timeout 30 min for the stress test. Success = 0 failures across 10,000 invocations (10 matrix entries x 1000). Any single failure means the cache=0 removal is not sufficient and a deeper fix is needed. This branch will be discarded once CI confirms the result. The actual fix (test change only, no CI change) will go on a separate branch/PR. --- .github/workflows/ci.yml | 33 +++++++++++---------- tests/contrib/langsmith/test_integration.py | 4 ++- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6294f6d9a..1234753b9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,7 +16,7 @@ env: jobs: # Build and test the project build-lint-test: - timeout-minutes: 30 + timeout-minutes: 60 strategy: fail-fast: false matrix: @@ -58,22 +58,25 @@ jobs: - uses: astral-sh/setup-uv@cec208311dfd045dd5311c1add060b2062131d57 # v8 - run: uv tool install poethepoet - run: uv sync --all-extras - - run: poe bridge-lint - if: ${{ matrix.clippyLinter }} + # [STRESS TEST BRANCH] bridge-lint disabled + # - run: poe bridge-lint + # if: ${{ matrix.clippyLinter }} - run: poe build-develop - - run: poe lint + # [STRESS TEST BRANCH] poe lint disabled + # - run: poe lint - run: mkdir junit-xml - - run: poe test ${{matrix.pytestExtraArgs}} -s --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}.xml - timeout-minutes: 15 - # Time skipping doesn't yet support ARM - - if: ${{ !endsWith(matrix.os, '-arm') }} - run: poe test ${{matrix.pytestExtraArgs}} -s --workflow-environment time-skipping --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}--time-skipping.xml - timeout-minutes: 10 - - if: ${{ matrix.openaiTestTarget && (github.event.pull_request.head.repo.full_name == '' || github.event.pull_request.head.repo.full_name == 'temporalio/sdk-python') }} - run: poe test tests/contrib/openai_agents/test_openai.py ${{matrix.pytestExtraArgs}} -s --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}--openai.xml - timeout-minutes: 10 - env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + # [STRESS TEST BRANCH] Run only the flaky test 1000x with no rerun hiding + - run: uv run pytest tests/contrib/langsmith/test_integration.py::TestBuiltinQueryFiltering::test_temporal_prefixed_query_not_traced -n auto --dist=worksteal -p no:rerunfailures -s --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}.xml + timeout-minutes: 30 + # [STRESS TEST BRANCH] Time-skipping and openai steps disabled for this branch + # - if: ${{ !endsWith(matrix.os, '-arm') }} + # run: poe test ${{matrix.pytestExtraArgs}} -s --workflow-environment time-skipping --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}--time-skipping.xml + # timeout-minutes: 10 + # - if: ${{ matrix.openaiTestTarget && (github.event.pull_request.head.repo.full_name == '' || github.event.pull_request.head.repo.full_name == 'temporalio/sdk-python') }} + # run: poe test tests/contrib/openai_agents/test_openai.py ${{matrix.pytestExtraArgs}} -s --junit-xml=junit-xml/${{ matrix.python }}--${{ matrix.os }}--openai.xml + # timeout-minutes: 10 + # env: + # OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - name: "Upload junit-xml artifacts" uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 if: always() diff --git a/tests/contrib/langsmith/test_integration.py b/tests/contrib/langsmith/test_integration.py index 78d48c71e..5ea5fe912 100644 --- a/tests/contrib/langsmith/test_integration.py +++ b/tests/contrib/langsmith/test_integration.py @@ -1224,9 +1224,11 @@ def my_query(self) -> str: class TestBuiltinQueryFiltering: """Verifies __temporal_ prefixed queries are not traced.""" + @pytest.mark.parametrize("_iteration", range(1000)) async def test_temporal_prefixed_query_not_traced( self, client: Client, + _iteration: int, ) -> None: """__temporal_workflow_metadata query should not produce a trace, but user-defined queries should still be traced. @@ -1235,6 +1237,7 @@ async def test_temporal_prefixed_query_not_traced( client-side QueryWorkflow traces, isolating the test to worker-side HandleQuery traces only. """ + del _iteration task_queue = f"query-filter-{uuid.uuid4()}" collector = InMemoryRunCollector() @@ -1249,7 +1252,6 @@ async def test_temporal_prefixed_query_not_traced( worker_client, QueryFilteringWorkflow, task_queue=task_queue, - max_cached_workflows=0, ) as worker: handle = await query_client.start_workflow( QueryFilteringWorkflow.run, From ea8bc4ea5911678f3c8bd2e43912cb2644cc3062 Mon Sep 17 00:00:00 2001 From: Maple Xu Date: Fri, 8 May 2026 16:47:56 -0400 Subject: [PATCH 2/2] =?UTF-8?q?DO=20NOT=20MERGE:=20negative=20control=20?= =?UTF-8?q?=E2=80=94=20restore=20max=5Fcached=5Fworkflows=3D0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore `max_cached_workflows=0` on the parametrized 1000x stress test to verify the hypothesis from the other direction: with cache=0 in place, Windows 3.10 should reproduce the [TMPRL1101] deadlock flake within 1000 iterations. Previous run (cache=0 removed) passed 1000/1000 on every matrix entry. Expectation: this run will fail on Windows 3.10. Co-Authored-By: Claude Opus 4.7 (1M context) --- tests/contrib/langsmith/test_integration.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/contrib/langsmith/test_integration.py b/tests/contrib/langsmith/test_integration.py index 5ea5fe912..8134adfca 100644 --- a/tests/contrib/langsmith/test_integration.py +++ b/tests/contrib/langsmith/test_integration.py @@ -1252,6 +1252,7 @@ async def test_temporal_prefixed_query_not_traced( worker_client, QueryFilteringWorkflow, task_queue=task_queue, + max_cached_workflows=0, ) as worker: handle = await query_client.start_workflow( QueryFilteringWorkflow.run,