Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ operator_ui/install
.devenv
event_dump.ndjson
.cursor/
.claude/
.claude/*
!/tools/test/.claude/skills/

# neovim
.nvim.lua
Expand Down
11 changes: 10 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ go test ./...
#### Notes

- The `parallel` flag can be used to limit CPU usage, for running tests in the background (`-parallel=4`) - the default is `GOMAXPROCS`
- The `p` flag can be used to limit the number of _packages_ tested concurrently, if they are interferring with one another (`-p=1`)
- The `p` flag can be used to limit the number of _packages_ tested concurrently, if they are interfering with one another (`-p=1`)
- The `-short` flag skips tests which depend on the database, for quickly spot checking simpler tests in around one minute

#### Race Detector
Expand Down Expand Up @@ -271,6 +271,15 @@ go test ./pkg/path -run=XXX -fuzz=FuzzTestName

https://go.dev/doc/fuzz/

#### New Flow

There's a new, simpler flow for running tests at [tools/test](./tools/test/README.md). It also includes a `diagnose` command, and an [AI skill](./tools/test/.agents/skills/chainlink-test-diagnosis/SKILL.md) for diagnosing and fixing unstable or slow tests.

```sh
# From the /chainlink root
go -C tools/test run . -h # Command help
```

### Go Modules

This repository contains three Go modules:
Expand Down
30 changes: 26 additions & 4 deletions core/internal/testutils/testutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,39 @@ func RandomizeName(n string) string {
// DefaultWaitTimeout is the default wait timeout. If you have a *testing.T, use WaitTimeout instead.
const DefaultWaitTimeout = 30 * time.Second

// WaitTimeout returns a timeout based on the test's Deadline, if available.
// deadlineRemainingBudget returns ~90% of time until the test deadline, or false if none.
func deadlineRemainingBudget(t *testing.T) (time.Duration, bool) {
if d, ok := t.Deadline(); ok {
return time.Until(d) * 9 / 10, true // 10% buffer for cleanup
}
return 0, false
}

// WaitTimeout returns a timeout capped by the test's Deadline, if available.
// Especially important to use in parallel tests, as their individual execution
// can get paused for arbitrary amounts of time.
//
// When a deadline exists, it uses the full remaining budget (90% of time until the
// deadline), not [DefaultWaitTimeout], so long-running tests still get enough wall
// clock under package timeouts.
func WaitTimeout(t *testing.T) time.Duration {
if d, ok := t.Deadline(); ok {
// 10% buffer for cleanup and scheduling delay
return time.Until(d) * 9 / 10
if budget, ok := deadlineRemainingBudget(t); ok {
return budget
}
return DefaultWaitTimeout
}

// WaitTimeoutCustom uses the requested duration when there is no test deadline.
// When the test has a deadline, it returns the lesser of the requested duration and
// the remaining budget (90% of time until deadline), so callers can ask for e.g. 5m
// without exceeding the test process deadline.
func WaitTimeoutCustom(t *testing.T, requested time.Duration) time.Duration {
Copy link
Copy Markdown
Contributor

@jmank88 jmank88 May 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this change the logic of WaitTimeout? It used to alway prefer 90% no matter how that compared to the default. Now it would return the default instead if that were shorter, but that will artificially limit tests that we want to use 90%.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ya. My thought was "let's force people to use cleaner timeouts" but I'm realizing that's pretty heavy-handed and unrealistic in this case.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would re-introduce a lot of chaos. Switching from static, artificially short timeouts over to these dynamic extended ones was a big win originally. I think we've only increased concurrency and non-determinism since then.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see that logic is still in this version. Why is it better to quit early, when the suite is configured to run longer? Scaling to that limit is a feature - e.g. What if I have an older machine? Am I expected to edit the code to get tests to pass?

if budget, ok := deadlineRemainingBudget(t); ok {
return min(budget, requested)
}
return requested
}

// Context returns a context with the test's deadline, if available.
// Deprecated: use [testing.TB.Context] directly
func Context(tb testing.TB) context.Context {
Expand Down
38 changes: 38 additions & 0 deletions core/internal/testutils/testutils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package testutils

import (
"testing"
"time"

"github.com/stretchr/testify/require"
)

func TestWaitTimeoutUsesDeadlineBudgetNotDefaultCap(t *testing.T) {
if d, ok := t.Deadline(); ok {
expectBefore := time.Until(d) * 9 / 10
got := WaitTimeout(t)
expectAfter := time.Until(d) * 9 / 10
require.Greater(t, got, time.Duration(0))
// got uses time.Until(deadline) inside WaitTimeout between these snapshots
require.GreaterOrEqual(t, got, expectAfter)
require.LessOrEqual(t, got, expectBefore)
} else {
require.Equal(t, DefaultWaitTimeout, WaitTimeout(t))
}
}

func TestWaitTimeoutCustom(t *testing.T) {
requested := 10 * time.Second

if d, ok := t.Deadline(); ok {
expectBefore := time.Until(d) * 9 / 10
got := WaitTimeoutCustom(t, requested)
expectAfter := time.Until(d) * 9 / 10
require.Greater(t, got, time.Duration(0))
require.LessOrEqual(t, got, requested)
require.GreaterOrEqual(t, got, min(expectAfter, requested))
require.LessOrEqual(t, got, min(expectBefore, requested))
} else {
require.Equal(t, requested, WaitTimeoutCustom(t, requested))
}
}
10 changes: 9 additions & 1 deletion core/logger/internal/colortest/prettyconsole_test.go
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
package colortest

import (
"strings"
"testing"

"github.com/fatih/color"
"github.com/stretchr/testify/assert"

"github.com/smartcontractkit/chainlink/v2/core/logger"
)

func init() {
func requireANSIColor(t *testing.T) {
t.Helper()
logger.InitColor(true)
if !strings.Contains(color.New(color.FgRed).Sprint("x"), "\x1b[31m") {
t.Skip("ANSI color output is disabled in this terminal")
}
}

func TestPrettyConsole_Write(t *testing.T) {
requireANSIColor(t)

tests := []struct {
name string
input string
Expand Down
6 changes: 2 additions & 4 deletions core/services/llo/telem/sampling.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,7 @@ func (s *sampler) StartPruningLoop(ctx context.Context, wg *sync.WaitGroup) {
return
}

wg.Add(1)
go func() {
defer wg.Done()
wg.Go(func() {
t := time.NewTicker(s.prunePeriod)
defer t.Stop()

Expand All @@ -101,7 +99,7 @@ func (s *sampler) StartPruningLoop(ctx context.Context, wg *sync.WaitGroup) {
return
}
}
}()
})
}

// pruneStorage removes all records which are older than a predefined period (s.prunePeriod).
Expand Down
52 changes: 21 additions & 31 deletions core/services/llo/telem/sampling_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"sync"
"testing"
"testing/synctest"
"time"

"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -115,12 +116,13 @@ func TestSample(t *testing.T) {
t.Parallel()

lggr := logger.TestSugared(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ctx := t.Context()
samplr := newSampler(lggr, true)
samplr.StartPruningLoop(ctx, &sync.WaitGroup{})

t0 := time.Now()
// Truncate to second boundary so both messages map to the same second regardless of wall-clock timing.
// Without truncation, t0 near a second boundary causes t0+50ms to fall in the next second, defeating deduplication.
t0 := time.Now().Truncate(time.Second)
msg0 := &llo.LLOOutcomeTelemetry{
DonId: 2,
ConfigDigest: []byte("digest"),
Expand All @@ -145,8 +147,7 @@ func TestPruningLoop(t *testing.T) {
t.Parallel()

lggr := logger.TestSugared(t)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
ctx := t.Context()

samplr := newSampler(lggr, true)
// We need a prune time of at least one second in order to detect outdated entries.
Expand Down Expand Up @@ -194,34 +195,23 @@ func TestPruningLoop(t *testing.T) {
func TestPruningLoop_Exits(t *testing.T) {
t.Parallel()

lggr := logger.TestSugared(t)
ctx, cancel := context.WithCancel(context.Background())
wg := &sync.WaitGroup{}
synctest.Test(t, func(t *testing.T) {
lggr := logger.TestSugared(t)
ctx, cancel := context.WithCancel(context.Background())
wg := new(sync.WaitGroup)

// Start the sampler and its loop. This increments the waitgroup's counter.
samplr := newSampler(lggr, true)
samplr.StartPruningLoop(ctx, wg)
// Start the sampler and its loop. This increments the waitgroup's counter.
samplr := newSampler(lggr, true)
samplr.StartPruningLoop(ctx, wg)

// Create a channel which will be closed when the waitgroup is done, i.e. when the loop is closed.
ch := make(chan struct{})
go func() {
wg.Wait()
close(ch)
}()
// A helper function to check if the channel is closed.
doneFn := func(timeout time.Duration) bool {
select {
case <-ch:
return true
case <-time.After(timeout):
return false
}
}
// Wait until the pruning loop is durably blocked (waiting on the ticker or context).
synctest.Wait()

// Assert the channel is not closed after 100ms.
assert.False(t, doneFn(100*time.Millisecond))
// Cancel the context and wait for the loop to exit.
cancel()

// Cancel the context and assert that the channel is closed within 100ms.
cancel()
assert.True(t, doneFn(100*time.Millisecond))
// Wait for the waitgroup to finish to ensure the routine exited.
// synctest.Test will also inherently wait for all bubbled goroutines to exit.
wg.Wait()
})
}
6 changes: 2 additions & 4 deletions core/services/llo/telem/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,9 +227,7 @@ func (t *telemeter) start(_ context.Context) error {
for {
select {
case tcc := <-t.chch:
wg.Add(1)
go func() {
defer wg.Done()
wg.Go(func() {
for {
select {
case p, ok := <-tcc.in:
Expand All @@ -242,7 +240,7 @@ func (t *telemeter) start(_ context.Context) error {
return
}
}
}()
})
case p := <-t.chTelemetryPipeline:
t.prepareV3PremiumLegacyTelemetry(p)
case rt := <-t.chOutcomeTelemetry:
Expand Down
10 changes: 4 additions & 6 deletions core/services/llo/telem/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,13 +302,13 @@ func Test_Telemeter_observationScopedTelemetry(t *testing.T) {
Name: "test-bridge-1",
RequestData: []byte(`foo`),
ResponseData: []byte(`bar`),
ResponseError: ptr("test error"),
ResponseError: new("test error"),
ResponseStatusCode: 200,
RequestStartTimestamp: time.Unix(1, 1),
RequestFinishTimestamp: time.Unix(2, 1),
LocalCacheHit: true,
SpecID: 3,
StreamID: ptr(uint32(135)),
StreamID: new(uint32(135)),
DotID: "ds1",
}
tm.TrackSeqNr(opts.ConfigDigest(), opts.SeqNr())
Expand Down Expand Up @@ -355,7 +355,7 @@ func Test_Telemeter_observationScopedTelemetry(t *testing.T) {
StreamValueType: 1,
StreamValueBinary: []byte{0x01, 0x02, 0x03},
StreamValueText: "stream value text",
ObservationError: ptr("test error"),
ObservationError: new("test error"),
ObservationTimestamp: time.Unix(1, 1).UnixNano(),
ObservationFinishedAt: time.Unix(2, 1).UnixNano(),
SeqNr: 42,
Expand Down Expand Up @@ -775,7 +775,7 @@ func Test_Telemeter_reportTelemetry(t *testing.T) {
tm.TrackSeqNr(opts.ConfigDigest(), opts.SeqNr())

receivedChannels := make([]uint32, 0, 3)
for i := 0; i < 3; i++ {
for range 3 {
tLog := <-m.chTypedLogs
assert.Equal(t, synchronization.LLOReport, tLog.telemType)
decoded := &datastreamsllo.LLOReportTelemetry{}
Expand All @@ -794,5 +794,3 @@ func Test_Telemeter_reportTelemetry(t *testing.T) {
}
})
}

func ptr[T any](t T) *T { return &t }
20 changes: 14 additions & 6 deletions core/services/vrf/v2/bhs_feeder_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package v2_test

import (
"math/big"
"testing"
"time"

Expand Down Expand Up @@ -86,11 +87,18 @@ func TestStartHeartbeats(t *testing.T) {
diff := heartbeatPeriod + 1*time.Second
t.Logf("Sleeping %.2f seconds before checking blockhash in BHS added by BHS_Heartbeats_Service\n", diff.Seconds())
time.Sleep(diff)
// storeEarliest in BHS contract stores blocktip - 256 in the Blockhash Store (BHS)
tipHeader, err := uni.backend.Client().HeaderByNumber(testutils.Context(t), nil)
require.NoError(t, err)
// the storeEarliest transaction will end up in a new block, hence the + 1 below.
blockNumberStored := tipHeader.Number.Uint64() - 256 + 1
verifyBlockhashStored(t, uni.coordinatorV2UniverseCommon, blockNumberStored)
// The heartbeat store tx may not reach the mempool before the first
// Commit under load, so we can't predict which block it mines in.
// Commit blocks and check current_tip-256 on each attempt until BHS
// has a blockhash stored at that offset.
require.Eventually(t, func() bool {
uni.backend.Commit()
tip, tipErr := uni.backend.Client().HeaderByNumber(testutils.Context(t), nil)
if tipErr != nil || tip == nil || tip.Number.Uint64() < 256 {
return false
}
_, err := uni.bhsContract.GetBlockhash(nil, new(big.Int).SetUint64(tip.Number.Uint64()-256))
return err == nil
}, testutils.WaitTimeoutCustom(t, 5*time.Minute), time.Second)
})
}
Loading
Loading