Skip to content

Commit fc16638

Browse files
committed
add container retry to clnode, postgres, jd and evm blockchains
1 parent 068b83b commit fc16638

File tree

5 files changed

+98
-4
lines changed

5 files changed

+98
-4
lines changed

framework/components/blockchain/containers.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func baseRequest(in *Input, useWS ExposeWs) testcontainers.ContainerRequest {
6464

6565
func createGenericEvmContainer(in *Input, req testcontainers.ContainerRequest, useWS bool) (*Output, error) {
6666
ctx := context.Background()
67-
c, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
67+
c, err := framework.StartContainerWithRetry(framework.L, ctx, testcontainers.GenericContainerRequest{
6868
ContainerRequest: req,
6969
Started: true,
7070
})

framework/components/clnode/clnode.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ func newNode(in *Input, pgOut *postgres.Output) (*NodeOut, error) {
324324
}
325325
req.KeepImage = false
326326
}
327-
c, err := tc.GenericContainer(ctx, tc.GenericContainerRequest{
327+
c, err := framework.StartContainerWithRetry(framework.L, ctx, tc.GenericContainerRequest{
328328
ContainerRequest: req,
329329
Started: true,
330330
})

framework/components/jd/jd.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,10 +116,12 @@ func NewJD(in *Input) (*Output, error) {
116116
}
117117
req.KeepImage = false
118118
}
119-
c, err := tc.GenericContainer(ctx, tc.GenericContainerRequest{
119+
120+
c, err := framework.StartContainerWithRetry(framework.L, ctx, tc.GenericContainerRequest{
120121
ContainerRequest: req,
121122
Started: true,
122123
})
124+
123125
if err != nil {
124126
return nil, err
125127
}

framework/components/postgres/postgres.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ func NewPostgreSQL(in *Input) (*Output, error) {
153153
}
154154
framework.ResourceLimitsFunc(h, in.ContainerResources)
155155
}
156-
c, err := testcontainers.GenericContainer(ctx, testcontainers.GenericContainerRequest{
156+
c, err := framework.StartContainerWithRetry(framework.L, ctx, testcontainers.GenericContainerRequest{
157157
ContainerRequest: req,
158158
Started: true,
159159
Reuse: true,

framework/docker.go

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/docker/docker/client"
2222
"github.com/docker/go-connections/nat"
2323
"github.com/google/uuid"
24+
"github.com/pkg/errors"
2425
"github.com/rs/zerolog"
2526
tc "github.com/testcontainers/testcontainers-go"
2627
"golang.org/x/sync/errgroup"
@@ -446,3 +447,94 @@ func NoDNS(noDNS bool, hc *container.HostConfig) {
446447
hc.DNS = []string{"127.0.0.1"}
447448
}
448449
}
450+
451+
// Retry functions copied from lib/docker/docker.go to avoid depending on that package
452+
type StartContainerRetrier func(l zerolog.Logger, ctx context.Context, startErr error, req tc.GenericContainerRequest) (tc.Container, error)
453+
454+
// NaiveRetrier is a simple retrier that tries to start the container again without any modifications.
455+
// It will remove the container if it exists and try to start it again.
456+
var NaiveRetrier = func(l zerolog.Logger, ctx context.Context, startErr error, req tc.GenericContainerRequest) (tc.Container, error) {
457+
l.Debug().
458+
Str("Start error", startErr.Error()).
459+
Str("Retrier", "NaiveRetrier").
460+
Msgf("Attempting to start %s container", req.Name)
461+
462+
req.Reuse = false // We need to force a new container to be created
463+
464+
removeErr := removeContainer(ctx, req)
465+
if removeErr != nil {
466+
l.Error().Err(removeErr).Msgf("Failed to remove %s container to initiate restart", req.Name)
467+
return nil, removeErr
468+
}
469+
470+
ct, err := tc.GenericContainer(ctx, req)
471+
if err == nil {
472+
l.Debug().
473+
Str("Retrier", "NaiveRetrier").
474+
Msgf("Successfully started %s container", req.Name)
475+
return ct, nil
476+
}
477+
if ct != nil {
478+
err := ct.Terminate(ctx)
479+
if err != nil {
480+
l.Error().
481+
Err(err).
482+
Msgf("Cannot terminate %s container to initiate restart", req.Name)
483+
return nil, err
484+
}
485+
}
486+
487+
l.Debug().
488+
Str("Original start error", startErr.Error()).
489+
Str("Current start error", err.Error()).
490+
Str("Retrier", "NaiveRetrier").
491+
Msgf("Failed to start %s container,", req.Name)
492+
493+
return nil, startErr
494+
}
495+
496+
// StartContainerWithRetry attempts to start a container with 3 retry attempts.
497+
// It will try to start the container with the provided retriers, if none are provided it will use the default retrier, which
498+
// simply tries to start the container again without any modifications.
499+
func StartContainerWithRetry(l zerolog.Logger, ctx context.Context, req tc.GenericContainerRequest, retriers ...StartContainerRetrier) (tc.Container, error) {
500+
var (
501+
ct tc.Container
502+
err error
503+
)
504+
505+
ct, err = tc.GenericContainer(ctx, req)
506+
if err == nil {
507+
return ct, nil
508+
}
509+
510+
if len(retriers) == 0 {
511+
retriers = append(retriers, NaiveRetrier)
512+
}
513+
514+
l.Warn().Err(err).Msgf("Cannot start %s container, retrying", req.Name)
515+
516+
req.Reuse = true // Try and see if we can reuse the container for a retry
517+
for _, retrier := range retriers {
518+
ct, err = retrier(l, ctx, err, req)
519+
if err == nil {
520+
return ct, nil
521+
}
522+
}
523+
524+
return nil, err
525+
}
526+
527+
func removeContainer(ctx context.Context, req tc.GenericContainerRequest) error {
528+
provider, providerErr := tc.NewDockerProvider()
529+
if providerErr != nil {
530+
return errors.Wrapf(providerErr, "failed to create Docker provider")
531+
}
532+
533+
removeErr := provider.Client().ContainerRemove(ctx, req.Name, container.RemoveOptions{Force: true})
534+
if removeErr != nil && strings.Contains(strings.ToLower(removeErr.Error()), "no such container") {
535+
// container doesn't exist, nothing to remove
536+
return nil
537+
}
538+
539+
return removeErr
540+
}

0 commit comments

Comments
 (0)