From ab0df7836d4ad1864a92eb3f1692bacda4b4685e Mon Sep 17 00:00:00 2001 From: Leavrth Date: Thu, 13 Apr 2023 15:45:54 +0800 Subject: [PATCH 1/6] add retry error Signed-off-by: Leavrth --- br/pkg/storage/s3.go | 7 +++++++ br/pkg/utils/backoff.go | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/br/pkg/storage/s3.go b/br/pkg/storage/s3.go index 97b9b09a69272..460266236fedf 100644 --- a/br/pkg/storage/s3.go +++ b/br/pkg/storage/s3.go @@ -956,7 +956,14 @@ func isDeadlineExceedError(err error) bool { return strings.Contains(err.Error(), "context deadline exceeded") } +func isConnectionResetError(err error) bool { + return strings.Contains(err.Error(), "read: connection reset") +} + func (rl retryerWithLog) ShouldRetry(r *request.Request) bool { + if isConnectionResetError(r.Error) { + return true + } if isDeadlineExceedError(r.Error) && r.HTTPRequest.URL.Host == ec2MetaAddress { // fast fail for unreachable linklocal address in EC2 containers. log.Warn("failed to get EC2 metadata. skipping.", logutil.ShortError(r.Error)) diff --git a/br/pkg/utils/backoff.go b/br/pkg/utils/backoff.go index 67ded2df49172..e1139d2b22586 100644 --- a/br/pkg/utils/backoff.go +++ b/br/pkg/utils/backoff.go @@ -194,11 +194,11 @@ func (bo *pdReqBackoffer) NextBackoff(err error) time.Duration { // bo.attempt-- e := errors.Cause(err) switch e { // nolint:errorlint - case nil, context.Canceled, context.DeadlineExceeded, io.EOF, sql.ErrNoRows: + case nil, context.Canceled, context.DeadlineExceeded, sql.ErrNoRows: // Excepted error, finish the operation bo.delayTime = 0 bo.attempt = 0 - case berrors.ErrRestoreTotalKVMismatch: + case berrors.ErrRestoreTotalKVMismatch, io.EOF: bo.delayTime = 2 * bo.delayTime bo.attempt-- default: From bf6af3994a5bb7ac4fa42f1275bfbd215b13dc1b Mon Sep 17 00:00:00 2001 From: Leavrth Date: Mon, 17 Apr 2023 19:03:16 +0800 Subject: [PATCH 2/6] add unit test Signed-off-by: Leavrth --- br/pkg/utils/backoff_test.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/br/pkg/utils/backoff_test.go b/br/pkg/utils/backoff_test.go index 1ddff4b7d7ca7..31778052f77e1 100644 --- a/br/pkg/utils/backoff_test.go +++ b/br/pkg/utils/backoff_test.go @@ -4,6 +4,7 @@ package utils_test import ( "context" + "io" "testing" "time" @@ -101,13 +102,16 @@ func TestPdBackoffWithRetryableError(t *testing.T) { gRPCError := status.Error(codes.Unavailable, "transport is closing") err := utils.WithRetry(context.Background(), func() error { defer func() { counter++ }() + if counter == 2 { + return io.EOF + } return gRPCError }, backoffer) require.Equal(t, 16, counter) require.Equal(t, []error{ gRPCError, gRPCError, - gRPCError, + io.EOF, gRPCError, gRPCError, gRPCError, From 093b38e2fbaa3a13e6d14df04e162c7431c4529c Mon Sep 17 00:00:00 2001 From: Leavrth Date: Mon, 24 Apr 2023 11:34:38 +0800 Subject: [PATCH 3/6] add unit test Signed-off-by: Leavrth --- br/pkg/storage/BUILD.bazel | 2 ++ br/pkg/storage/s3.go | 8 +++++++ br/pkg/storage/s3_test.go | 49 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/br/pkg/storage/BUILD.bazel b/br/pkg/storage/BUILD.bazel index a21eb4cb03e9a..e8b9522015676 100644 --- a/br/pkg/storage/BUILD.bazel +++ b/br/pkg/storage/BUILD.bazel @@ -42,6 +42,7 @@ go_library( "@com_github_google_uuid//:uuid", "@com_github_klauspost_compress//zstd", "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_pingcap_log//:log", "@com_github_spf13_pflag//:pflag", @@ -80,6 +81,7 @@ go_test( "@com_github_fsouza_fake_gcs_server//fakestorage", "@com_github_golang_mock//gomock", "@com_github_pingcap_errors//:errors", + "@com_github_pingcap_failpoint//:failpoint", "@com_github_pingcap_kvproto//pkg/brpb", "@com_github_stretchr_testify//require", ], diff --git a/br/pkg/storage/s3.go b/br/pkg/storage/s3.go index 460266236fedf..d6d29706825bc 100644 --- a/br/pkg/storage/s3.go +++ b/br/pkg/storage/s3.go @@ -27,6 +27,7 @@ import ( "github.com/aws/aws-sdk-go/service/s3/s3iface" "github.com/aws/aws-sdk-go/service/s3/s3manager" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/log" berrors "github.com/pingcap/tidb/br/pkg/errors" @@ -961,6 +962,13 @@ func isConnectionResetError(err error) bool { } func (rl retryerWithLog) ShouldRetry(r *request.Request) bool { + // for unit test + failpoint.Inject("replace-error-to-connection-reset-by-peer", func(_ failpoint.Value) { + log.Info("original error", zap.Error(r.Error)) + if r.Error != nil { + r.Error = errors.New("read tcp *.*.*.*:*->*.*.*.*:*: read: connection reset by peer") + } + }) if isConnectionResetError(r.Error) { return true } diff --git a/br/pkg/storage/s3_test.go b/br/pkg/storage/s3_test.go index 0541e66b8617f..265733f61dfdb 100644 --- a/br/pkg/storage/s3_test.go +++ b/br/pkg/storage/s3_test.go @@ -12,6 +12,7 @@ import ( "net/http" "net/http/httptest" "os" + "sync" "testing" "github.com/aws/aws-sdk-go/aws" @@ -20,6 +21,7 @@ import ( "github.com/aws/aws-sdk-go/service/s3" "github.com/golang/mock/gomock" "github.com/pingcap/errors" + "github.com/pingcap/failpoint" backuppb "github.com/pingcap/kvproto/pkg/brpb" "github.com/pingcap/tidb/br/pkg/mock" . "github.com/pingcap/tidb/br/pkg/storage" @@ -1292,3 +1294,50 @@ func TestS3StorageBucketRegion(t *testing.T) { }(ca.name, ca.expectRegion, ca.s3) } } + +func TestRetryError(t *testing.T) { + var count int32 = 0 + var errString string = "read tcp *.*.*.*:*->*.*.*.*:*: read: connection reset by peer" + var lock sync.Mutex + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "PUT" { + var curCnt int32 + t.Log(r.URL) + lock.Lock() + count += 1 + curCnt = count + lock.Unlock() + if curCnt < 2 { + // write an cannot-retry error, but we modify the error to specific error, so client would retry. + w.WriteHeader(403) + return + } + } + + w.WriteHeader(200) + + })) + + defer server.Close() + t.Log(server.URL) + + require.NoError(t, failpoint.Enable("github.com/pingcap/tidb/br/pkg/storage/replace-error-to-connection-reset-by-peer", "return(true)")) + defer func() { + failpoint.Disable("github.com/pingcap/tidb/br/pkg/storage/replace-error-to-connection-reset-by-peer") + }() + + ctx := context.Background() + s, err := NewS3Storage(ctx, &backuppb.S3{ + Endpoint: server.URL, + Bucket: "test", + Prefix: "retry", + AccessKey: "none", + SecretAccessKey: "none", + Provider: "skip check region", + ForcePathStyle: true, + }, &ExternalStorageOptions{}) + require.NoError(t, err) + err = s.WriteFile(ctx, "reset", []byte(errString)) + require.NoError(t, err) + require.Equal(t, count, int32(2)) +} From 4642ccaf7f98c87842dbd17d21d8a429d081d184 Mon Sep 17 00:00:00 2001 From: Leavrth Date: Mon, 8 May 2023 13:49:55 +0800 Subject: [PATCH 4/6] remove empty line Signed-off-by: Leavrth --- .bazelversion | 2 +- br/pkg/storage/s3_test.go | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.bazelversion b/.bazelversion index 09b254e90c61e..f3b5af39e430e 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -6.0.0 +6.1.1 diff --git a/br/pkg/storage/s3_test.go b/br/pkg/storage/s3_test.go index 265733f61dfdb..a1f185824417b 100644 --- a/br/pkg/storage/s3_test.go +++ b/br/pkg/storage/s3_test.go @@ -1315,7 +1315,6 @@ func TestRetryError(t *testing.T) { } w.WriteHeader(200) - })) defer server.Close() From 9436bd1276770273920e71e6ec622d040be92a48 Mon Sep 17 00:00:00 2001 From: Leavrth Date: Mon, 8 May 2023 14:03:27 +0800 Subject: [PATCH 5/6] change back the bazel version Signed-off-by: Leavrth --- .bazelversion | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.bazelversion b/.bazelversion index f3b5af39e430e..09b254e90c61e 100644 --- a/.bazelversion +++ b/.bazelversion @@ -1 +1 @@ -6.1.1 +6.0.0 From 19f8d3e0ea66378fc0b4604235838f8f4244dd01 Mon Sep 17 00:00:00 2001 From: Leavrth Date: Mon, 8 May 2023 14:46:20 +0800 Subject: [PATCH 6/6] make bazel_prepare Signed-off-by: Leavrth --- br/pkg/storage/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/pkg/storage/BUILD.bazel b/br/pkg/storage/BUILD.bazel index 87ccd4efdfb2c..810585525db8d 100644 --- a/br/pkg/storage/BUILD.bazel +++ b/br/pkg/storage/BUILD.bazel @@ -71,7 +71,7 @@ go_test( ], embed = [":storage"], flaky = True, - shard_count = 42, + shard_count = 43, deps = [ "//br/pkg/mock", "@com_github_aws_aws_sdk_go//aws",