Skip to content

Commit

Permalink
lightning: retry for WaitN error caused by store-write-bwlimit (#51425)…
Browse files Browse the repository at this point in the history
… (#52581)

close #51383
  • Loading branch information
ti-chi-bot committed Apr 15, 2024
1 parent 425cda9 commit a1b73d0
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 0 deletions.
1 change: 1 addition & 0 deletions br/pkg/lightning/common/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ go_test(
"@com_github_stretchr_testify//require",
"@org_golang_google_grpc//codes",
"@org_golang_google_grpc//status",
"@org_golang_x_time//rate",
"@org_uber_go_goleak//:goleak",
"@org_uber_go_multierr//:multierr",
],
Expand Down
2 changes: 2 additions & 0 deletions br/pkg/lightning/common/retry.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ var retryableErrorMsgList = []string{
// this error happens on when distsql.Checksum calls TiKV
// see https://github.com/pingcap/tidb/blob/2c3d4f1ae418881a95686e8b93d4237f2e76eec6/store/copr/coprocessor.go#L941
"coprocessor task terminated due to exceeding the deadline",
// fix https://github.com/pingcap/tidb/issues/51383
"rate: wait",
}

func isRetryableFromErrorMessage(err error) bool {
Expand Down
11 changes: 11 additions & 0 deletions br/pkg/lightning/common/retry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,15 @@ import (
"net"
"net/url"
"testing"
"time"

"github.com/go-sql-driver/mysql"
"github.com/pingcap/errors"
tmysql "github.com/pingcap/tidb/errno"
drivererr "github.com/pingcap/tidb/store/driver/error"
"github.com/stretchr/testify/require"
"go.uber.org/multierr"
"golang.org/x/time/rate"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
Expand Down Expand Up @@ -117,4 +119,13 @@ func TestIsRetryableError(t *testing.T) {
require.False(t, IsRetryableError(multierr.Combine(context.Canceled, &net.DNSError{IsTimeout: true})))

require.True(t, IsRetryableError(errors.New("other error: Coprocessor task terminated due to exceeding the deadline")))

// error from limiter
l := rate.NewLimiter(rate.Limit(1), 1)
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
defer cancel()
// context has 1 second timeout, can't wait for 10 seconds
err = l.WaitN(ctx, 10)
require.Error(t, err)
require.True(t, IsRetryableError(err))
}

0 comments on commit a1b73d0

Please sign in to comment.