[DX-4017] [DX-3716] Core Test Timeout Fix #22281

jmank88 · 2026-05-05T19:44:21Z

Doesn't this change the logic of WaitTimeout? It used to alway prefer 90% no matter how that compared to the default. Now it would return the default instead if that were shorter, but that will artificially limit tests that we want to use 90%.

Ya. My thought was "let's force people to use cleaner timeouts" but I'm realizing that's pretty heavy-handed and unrealistic in this case.

I think it would re-introduce a lot of chaos. Switching from static, artificially short timeouts over to these dynamic extended ones was a big win originally. I think we've only increased concurrency and non-determinism since then.

I see that logic is still in this version. Why is it better to quit early, when the suite is configured to run longer? Scaling to that limit is a feature - e.g. What if I have an older machine? Am I expected to edit the code to get tests to pass?

-Original file line number
+Diff line change
@@ Expand Up / @@ -34,7 +34,8 @@ operator_ui/install @@
     .devenv
     event_dump.ndjson
     .cursor/
-    .claude/
+    .claude/*
+    !/tools/test/.claude/skills/
     # neovim
     .nvim.lua
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -239,7 +239,7 @@ go test ./... @@
     #### Notes
     - The `parallel` flag can be used to limit CPU usage, for running tests in the background (`-parallel=4`) - the default is `GOMAXPROCS`
-    - The `p` flag can be used to limit the number of _packages_ tested concurrently, if they are interferring with one another (`-p=1`)
+    - The `p` flag can be used to limit the number of _packages_ tested concurrently, if they are interfering with one another (`-p=1`)
     - The `-short` flag skips tests which depend on the database, for quickly spot checking simpler tests in around one minute
     #### Race Detector
@@ Expand Down Expand Up / @@ -271,6 +271,15 @@ go test ./pkg/path -run=XXX -fuzz=FuzzTestName @@
     https://go.dev/doc/fuzz/
+    #### New Flow
+    There's a new, simpler flow for running tests at [tools/test](./tools/test/README.md). It also includes a `diagnose` command, and an [AI skill](./tools/test/.agents/skills/chainlink-test-diagnosis/SKILL.md) for diagnosing and fixing unstable or slow tests.
+    ```sh
+    # From the /chainlink root
+    go -C tools/test run . -h # Command help
+    ```
     ### Go Modules
     This repository contains three Go modules:
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -86,17 +86,39 @@ func RandomizeName(n string) string { @@
     // DefaultWaitTimeout is the default wait timeout. If you have a *testing.T, use WaitTimeout instead.
     const DefaultWaitTimeout = 30 * time.Second
-    // WaitTimeout returns a timeout based on the test's Deadline, if available.
+    // deadlineRemainingBudget returns ~90% of time until the test deadline, or false if none.
+    func deadlineRemainingBudget(t *testing.T) (time.Duration, bool) {
+    	if d, ok := t.Deadline(); ok {
+    		return time.Until(d) * 9 / 10, true // 10% buffer for cleanup
+    	}
+    	return 0, false
+    }
+    // WaitTimeout returns a timeout capped by the test's Deadline, if available.
     // Especially important to use in parallel tests, as their individual execution
     // can get paused for arbitrary amounts of time.
+    //
+    // When a deadline exists, it uses the full remaining budget (90% of time until the
+    // deadline), not [DefaultWaitTimeout], so long-running tests still get enough wall
+    // clock under package timeouts.
     func WaitTimeout(t *testing.T) time.Duration {
-    	if d, ok := t.Deadline(); ok {
-    		// 10% buffer for cleanup and scheduling delay
-    		return time.Until(d) * 9 / 10
+    	if budget, ok := deadlineRemainingBudget(t); ok {
+    		return budget
     	}
     	return DefaultWaitTimeout
     }
+    // WaitTimeoutCustom uses the requested duration when there is no test deadline.
+    // When the test has a deadline, it returns the lesser of the requested duration and
+    // the remaining budget (90% of time until deadline), so callers can ask for e.g. 5m
+    // without exceeding the test process deadline.
+    func WaitTimeoutCustom(t *testing.T, requested time.Duration) time.Duration {
+    	if budget, ok := deadlineRemainingBudget(t); ok {
+    		return min(budget, requested)
+    	}
+    	return requested
+    }
     // Context returns a context with the test's deadline, if available.
     // Deprecated: use [testing.TB.Context] directly
     func Context(tb testing.TB) context.Context {
@@ Expand Down @@

-Original file line number
+Diff line change
@@ -0,0 +1,38 @@
+    package testutils
+    import (
+    	"testing"
+    	"time"
+    	"github.com/stretchr/testify/require"
+    )
+    func TestWaitTimeoutUsesDeadlineBudgetNotDefaultCap(t *testing.T) {
+    	if d, ok := t.Deadline(); ok {
+    		expectBefore := time.Until(d) * 9 / 10
+    		got := WaitTimeout(t)
+    		expectAfter := time.Until(d) * 9 / 10
+    		require.Greater(t, got, time.Duration(0))
+    		// got uses time.Until(deadline) inside WaitTimeout between these snapshots
+    		require.GreaterOrEqual(t, got, expectAfter)
+    		require.LessOrEqual(t, got, expectBefore)
+    	} else {
+    		require.Equal(t, DefaultWaitTimeout, WaitTimeout(t))
+    	}
+    }
+    func TestWaitTimeoutCustom(t *testing.T) {
+    	requested := 10 * time.Second
+    	if d, ok := t.Deadline(); ok {
+    		expectBefore := time.Until(d) * 9 / 10
+    		got := WaitTimeoutCustom(t, requested)
+    		expectAfter := time.Until(d) * 9 / 10
+    		require.Greater(t, got, time.Duration(0))
+    		require.LessOrEqual(t, got, requested)
+    		require.GreaterOrEqual(t, got, min(expectAfter, requested))
+    		require.LessOrEqual(t, got, min(expectBefore, requested))
+    	} else {
+    		require.Equal(t, requested, WaitTimeoutCustom(t, requested))
+    	}
+    }

-Original file line number
+Diff line change
@@ -1,18 +1,26 @@
     package colortest
     import (
+    	"strings"
     	"testing"
+    	"github.com/fatih/color"
     	"github.com/stretchr/testify/assert"
     	"github.com/smartcontractkit/chainlink/v2/core/logger"
     )
-    func init() {
+    func requireANSIColor(t *testing.T) {
+    	t.Helper()
     	logger.InitColor(true)
+    	if !strings.Contains(color.New(color.FgRed).Sprint("x"), "\x1b[31m") {
+    		t.Skip("ANSI color output is disabled in this terminal")
+    	}
     }
     func TestPrettyConsole_Write(t *testing.T) {
+    	requireANSIColor(t)
     	tests := []struct {
     		name      string
     		input     string
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[DX-4017] [DX-3716] Core Test Timeout Fix #22281

Uh oh!

Diff view

Diff view

There are no files selected for viewing

jmank88 May 5, 2026 •

edited

Loading

Uh oh!

kalverra May 5, 2026

Uh oh!

jmank88 May 5, 2026

Uh oh!

jmank88 May 5, 2026

Uh oh!

Uh oh!

Uh oh!

-Original file line number
+Diff line change
@@ Expand Up @@
     		return
     	}
-    	wg.Add(1)
-    	go func() {
-    		defer wg.Done()
+    	wg.Go(func() {
     		t := time.NewTicker(s.prunePeriod)
     		defer t.Stop()
@@ Expand All @@
     				return
     			}
     		}
-    	}()
+    	})
     }
     // pruneStorage removes all records which are older than a predefined period (s.prunePeriod).
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ import ( @@
     	"fmt"
     	"sync"
     	"testing"
+    	"testing/synctest"
     	"time"
     	"github.com/stretchr/testify/assert"
@@ Expand Down Expand Up / @@ -115,12 +116,13 @@ func TestSample(t *testing.T) { @@
     	t.Parallel()
     	lggr := logger.TestSugared(t)
-    	ctx, cancel := context.WithCancel(context.Background())
-    	defer cancel()
+    	ctx := t.Context()
     	samplr := newSampler(lggr, true)
     	samplr.StartPruningLoop(ctx, &sync.WaitGroup{})
-    	t0 := time.Now()
+    	// Truncate to second boundary so both messages map to the same second regardless of wall-clock timing.
+    	// Without truncation, t0 near a second boundary causes t0+50ms to fall in the next second, defeating deduplication.
+    	t0 := time.Now().Truncate(time.Second)
     	msg0 := &llo.LLOOutcomeTelemetry{
     		DonId:                           2,
     		ConfigDigest:                    []byte("digest"),
@@ Expand All / @@ -145,8 +147,7 @@ func TestPruningLoop(t *testing.T) { @@
     	t.Parallel()
     	lggr := logger.TestSugared(t)
-    	ctx, cancel := context.WithCancel(context.Background())
-    	defer cancel()
+    	ctx := t.Context()
     	samplr := newSampler(lggr, true)
     	// We need a prune time of at least one second in order to detect outdated entries.
@@ Expand Down Expand Up / @@ -194,34 +195,23 @@ func TestPruningLoop(t *testing.T) { @@
     func TestPruningLoop_Exits(t *testing.T) {
     	t.Parallel()
-    	lggr := logger.TestSugared(t)
-    	ctx, cancel := context.WithCancel(context.Background())
-    	wg := &sync.WaitGroup{}
+    	synctest.Test(t, func(t *testing.T) {
+    		lggr := logger.TestSugared(t)
+    		ctx, cancel := context.WithCancel(context.Background())
+    		wg := new(sync.WaitGroup)
-    	// Start the sampler and its loop. This increments the waitgroup's counter.
-    	samplr := newSampler(lggr, true)
-    	samplr.StartPruningLoop(ctx, wg)
+    		// Start the sampler and its loop. This increments the waitgroup's counter.
+    		samplr := newSampler(lggr, true)
+    		samplr.StartPruningLoop(ctx, wg)
-    	// Create a channel which will be closed when the waitgroup is done, i.e. when the loop is closed.
-    	ch := make(chan struct{})
-    	go func() {
-    		wg.Wait()
-    		close(ch)
-    	}()
-    	// A helper function to check if the channel is closed.
-    	doneFn := func(timeout time.Duration) bool {
-    		select {
-    		case <-ch:
-    			return true
-    		case <-time.After(timeout):
-    			return false
-    		}
-    	}
+    		// Wait until the pruning loop is durably blocked (waiting on the ticker or context).
+    		synctest.Wait()
-    	// Assert the channel is not closed after 100ms.
-    	assert.False(t, doneFn(100*time.Millisecond))
+    		// Cancel the context and wait for the loop to exit.
+    		cancel()
-    	// Cancel the context and assert that the channel is closed within 100ms.
-    	cancel()
-    	assert.True(t, doneFn(100*time.Millisecond))
+    		// Wait for the waitgroup to finish to ensure the routine exited.
+    		// synctest.Test will also inherently wait for all bubbled goroutines to exit.
+    		wg.Wait()
+    	})
     }

-Original file line number
+Diff line change
@@ Expand Up @@
     			Name:                   "test-bridge-1",
     			RequestData:            []byte(`foo`),
     			ResponseData:           []byte(`bar`),
-    			ResponseError:          ptr("test error"),
+    			ResponseError:          new("test error"),
     			ResponseStatusCode:     200,
     			RequestStartTimestamp:  time.Unix(1, 1),
     			RequestFinishTimestamp: time.Unix(2, 1),
     			LocalCacheHit:          true,
     			SpecID:                 3,
-    			StreamID:               ptr(uint32(135)),
+    			StreamID:               new(uint32(135)),
     			DotID:                  "ds1",
     		}
     		tm.TrackSeqNr(opts.ConfigDigest(), opts.SeqNr())
@@ Expand Down Expand Up @@
     			StreamValueType:       1,
     			StreamValueBinary:     []byte{0x01, 0x02, 0x03},
     			StreamValueText:       "stream value text",
-    			ObservationError:      ptr("test error"),
+    			ObservationError:      new("test error"),
     			ObservationTimestamp:  time.Unix(1, 1).UnixNano(),
     			ObservationFinishedAt: time.Unix(2, 1).UnixNano(),
     			SeqNr:                 42,
@@ Expand Down Expand Up / @@ -775,7 +775,7 @@ func Test_Telemeter_reportTelemetry(t *testing.T) { @@
     		tm.TrackSeqNr(opts.ConfigDigest(), opts.SeqNr())
     		receivedChannels := make([]uint32, 0, 3)
-    		for i := 0; i < 3; i++ {
+    		for range 3 {
     			tLog := <-m.chTypedLogs
     			assert.Equal(t, synchronization.LLOReport, tLog.telemType)
     			decoded := &datastreamsllo.LLOReportTelemetry{}
@@ Expand All / @@ -794,5 +794,3 @@ func Test_Telemeter_reportTelemetry(t *testing.T) { @@
     		}
     	})
     }
-    func ptr[T any](t T) *T { return &t }

-Original file line number
+Diff line change
@@ -1,6 +1,7 @@
     package v2_test
     import (
+    	"math/big"
     	"testing"
     	"time"
@@ Expand Down Expand Up / @@ -86,11 +87,18 @@ func TestStartHeartbeats(t *testing.T) { @@
     		diff := heartbeatPeriod + 1*time.Second
     		t.Logf("Sleeping %.2f seconds before checking blockhash in BHS added by BHS_Heartbeats_Service\n", diff.Seconds())
     		time.Sleep(diff)
-    		// storeEarliest in BHS contract stores blocktip - 256 in the Blockhash Store (BHS)
-    		tipHeader, err := uni.backend.Client().HeaderByNumber(testutils.Context(t), nil)
-    		require.NoError(t, err)
-    		// the storeEarliest transaction will end up in a new block, hence the + 1 below.
-    		blockNumberStored := tipHeader.Number.Uint64() - 256 + 1
-    		verifyBlockhashStored(t, uni.coordinatorV2UniverseCommon, blockNumberStored)
+    		// The heartbeat store tx may not reach the mempool before the first
+    		// Commit under load, so we can't predict which block it mines in.
+    		// Commit blocks and check current_tip-256 on each attempt until BHS
+    		// has a blockhash stored at that offset.
+    		require.Eventually(t, func() bool {
+    			uni.backend.Commit()
+    			tip, tipErr := uni.backend.Client().HeaderByNumber(testutils.Context(t), nil)
+    			if tipErr != nil || tip == nil || tip.Number.Uint64() < 256 {
+    				return false
+    			}
+    			_, err := uni.bhsContract.GetBlockhash(nil, new(big.Int).SetUint64(tip.Number.Uint64()-256))
+    			return err == nil
+    		}, testutils.WaitTimeoutCustom(t, 5*time.Minute), time.Second)
     	})
     }

[DX-4017] [DX-3716] Core Test Timeout Fix #22281

Are you sure you want to change the base?

Uh oh!

[DX-4017] [DX-3716] Core Test Timeout Fix #22281

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

jmank88 May 5, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

kalverra May 5, 2026

Choose a reason for hiding this comment

Uh oh!

jmank88 May 5, 2026

Choose a reason for hiding this comment

Uh oh!

jmank88 May 5, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

jmank88 May 5, 2026 •

edited

Loading