Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #88017 from feiskyer/fix-409
Make Azure clients only retry on specified HTTP status codes
  • Loading branch information
k8s-ci-robot committed Feb 12, 2020
2 parents de9bbcc + 6a48772 commit 50c8f73
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 47 deletions.
16 changes: 5 additions & 11 deletions staging/src/k8s.io/legacy-cloud-providers/azure/azure.go
Expand Up @@ -504,23 +504,17 @@ func (az *Cloud) InitializeCloudFromConfig(config *Config, fromSecret bool) erro
az.VirtualMachineSizesClient = vmsizeclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineSizeRateLimit))
az.SnapshotsClient = snapshotclient.New(azClientConfig.WithRateLimiter(config.SnapshotRateLimit))
az.StorageAccountClient = storageaccountclient.New(azClientConfig.WithRateLimiter(config.StorageAccountRateLimit))

az.VirtualMachinesClient = vmclient.New(azClientConfig.WithRateLimiter(config.VirtualMachineRateLimit))
az.DisksClient = diskclient.New(azClientConfig.WithRateLimiter(config.DiskRateLimit))
// fileClient is not based on armclient, but it's still backoff retried.
az.FileClient = newAzureFileClient(env, azClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusNotFound}))

vmClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineRateLimit)
vmClientConfig.Backoff = vmClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusConflict})
az.VirtualMachinesClient = vmclient.New(vmClientConfig)
az.FileClient = newAzureFileClient(env, azClientConfig.Backoff)

// Error "not an active Virtual Machine Scale Set VM" is not retriable for VMSS VM.
// But http.StatusNotFound is retriable because of ARM replication latency.
vmssVMClientConfig := azClientConfig.WithRateLimiter(config.VirtualMachineScaleSetRateLimit)
vmssVMClientConfig.Backoff = vmssVMClientConfig.Backoff.WithNonRetriableErrors([]string{vmssVMNotActiveErrorMessage}, []int{http.StatusConflict})
vmssVMClientConfig.Backoff = vmssVMClientConfig.Backoff.WithNonRetriableErrors([]string{vmssVMNotActiveErrorMessage}).WithRetriableHTTPStatusCodes([]int{http.StatusNotFound})
az.VirtualMachineScaleSetVMsClient = vmssvmclient.New(vmssVMClientConfig)

disksClientConfig := azClientConfig.WithRateLimiter(config.DiskRateLimit)
disksClientConfig.Backoff = disksClientConfig.Backoff.WithNonRetriableErrors([]string{}, []int{http.StatusNotFound, http.StatusConflict})
az.DisksClient = diskclient.New(disksClientConfig)

if az.MaximumLoadBalancerRuleCount == 0 {
az.MaximumLoadBalancerRuleCount = maximumLoadBalancerRuleCount
}
Expand Down
Expand Up @@ -801,7 +801,7 @@ func TestReconcileSecurityGroupEtagMismatch(t *testing.T) {
HTTPStatusCode: http.StatusPreconditionFailed,
RawError: errPreconditionFailedEtagMismatch,
}
assert.Equal(t, err, expectedError.Error())
assert.Equal(t, expectedError.Error(), err)
}

func TestReconcilePublicIPWithNewService(t *testing.T) {
Expand Down
Expand Up @@ -154,7 +154,7 @@ func TestSendAsync(t *testing.T) {
assert.Nil(t, response)
assert.Equal(t, 1, count)
assert.NotNil(t, rerr)
assert.Equal(t, true, rerr.Retriable)
assert.Equal(t, false, rerr.Retriable)
}

func TestNormalizeAzureRegion(t *testing.T) {
Expand Down Expand Up @@ -236,7 +236,7 @@ func TestDeleteResourceAsync(t *testing.T) {
count := 0
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
count++
http.Error(w, "failed", http.StatusForbidden)
http.Error(w, "failed", http.StatusInternalServerError)
}))

backoff := &retry.Backoff{Steps: 3}
Expand Down
Expand Up @@ -38,6 +38,15 @@ const (
var (
// The function to get current time.
now = time.Now

// StatusCodesForRetry are a defined group of status code for which the client will retry.
StatusCodesForRetry = []int{
http.StatusRequestTimeout, // 408
http.StatusInternalServerError, // 500
http.StatusBadGateway, // 502
http.StatusServiceUnavailable, // 503
http.StatusGatewayTimeout, // 504
}
)

// Error indicates an error returned by Azure APIs.
Expand Down Expand Up @@ -184,18 +193,21 @@ func getHTTPStatusCode(resp *http.Response) int {
// shouldRetryHTTPRequest determines if the request is retriable.
func shouldRetryHTTPRequest(resp *http.Response, err error) bool {
if resp != nil {
// HTTP 412 (StatusPreconditionFailed) means etag mismatch
// HTTP 400 (BadRequest) means the request cannot be accepted, hence we shouldn't retry.
if resp.StatusCode == http.StatusPreconditionFailed || resp.StatusCode == http.StatusBadRequest {
return false
for _, code := range StatusCodesForRetry {
if resp.StatusCode == code {
return true
}
}

// HTTP 4xx (except 412) or 5xx suggests we should retry.
if 399 < resp.StatusCode && resp.StatusCode < 600 {
// should retry on <200, error>.
if isSuccessHTTPResponse(resp) && err != nil {
return true
}

return false
}

// should retry when error is not nil and no http.Response.
if err != nil {
return true
}
Expand Down Expand Up @@ -224,6 +236,24 @@ func getRetryAfter(resp *http.Response) time.Duration {
return dur
}

// GetErrorWithRetriableHTTPStatusCodes gets an error with RetriableHTTPStatusCodes.
// It is used to retry on some HTTPStatusCodes.
func GetErrorWithRetriableHTTPStatusCodes(resp *http.Response, err error, retriableHTTPStatusCodes []int) *Error {
rerr := GetError(resp, err)
if rerr == nil {
return nil
}

for _, code := range retriableHTTPStatusCodes {
if rerr.HTTPStatusCode == code {
rerr.Retriable = true
break
}
}

return rerr
}

// GetStatusNotFoundAndForbiddenIgnoredError gets an error with StatusNotFound and StatusForbidden ignored.
// It is only used in DELETE operations.
func GetStatusNotFoundAndForbiddenIgnoredError(resp *http.Response, err error) *Error {
Expand Down
Expand Up @@ -73,7 +73,7 @@ func TestGetError(t *testing.T) {
code: http.StatusSeeOther,
err: fmt.Errorf("some error"),
expected: &Error{
Retriable: true,
Retriable: false,
HTTPStatusCode: http.StatusSeeOther,
RawError: fmt.Errorf("some error"),
},
Expand All @@ -82,7 +82,7 @@ func TestGetError(t *testing.T) {
code: http.StatusTooManyRequests,
retryAfter: 100,
expected: &Error{
Retriable: true,
Retriable: false,
HTTPStatusCode: http.StatusTooManyRequests,
RetryAfter: now().Add(100 * time.Second),
RawError: fmt.Errorf("some error"),
Expand Down Expand Up @@ -156,7 +156,7 @@ func TestGetStatusNotFoundAndForbiddenIgnoredError(t *testing.T) {
code: http.StatusSeeOther,
err: fmt.Errorf("some error"),
expected: &Error{
Retriable: true,
Retriable: false,
HTTPStatusCode: http.StatusSeeOther,
RawError: fmt.Errorf("some error"),
},
Expand All @@ -165,7 +165,7 @@ func TestGetStatusNotFoundAndForbiddenIgnoredError(t *testing.T) {
code: http.StatusTooManyRequests,
retryAfter: 100,
expected: &Error{
Retriable: true,
Retriable: false,
HTTPStatusCode: http.StatusTooManyRequests,
RetryAfter: now().Add(100 * time.Second),
RawError: fmt.Errorf("some error"),
Expand Down
Expand Up @@ -58,8 +58,8 @@ type Backoff struct {
Cap time.Duration
// The errors indicate that the request shouldn't do more retrying.
NonRetriableErrors []string
// The HTTPStatusCode indicates that the request shouldn't do more retrying.
NonRetriableHTTPStatusCodes []int
// The RetriableHTTPStatusCodes indicates that the HTTPStatusCode should do more retrying.
RetriableHTTPStatusCodes []int
}

// NewBackoff creates a new Backoff.
Expand All @@ -73,11 +73,17 @@ func NewBackoff(duration time.Duration, factor float64, jitter float64, steps in
}
}

// WithNonRetriableErrors returns a new *Backoff with NonRetriableErrors, NonRetriableHTTPStatusCodes assigned.
func (b *Backoff) WithNonRetriableErrors(errs []string, httpStatusCodes []int) *Backoff {
// WithNonRetriableErrors returns a new *Backoff with NonRetriableErrors assigned.
func (b *Backoff) WithNonRetriableErrors(errs []string) *Backoff {
newBackoff := *b
newBackoff.NonRetriableErrors = errs
newBackoff.NonRetriableHTTPStatusCodes = httpStatusCodes
return &newBackoff
}

// WithRetriableHTTPStatusCodes returns a new *Backoff with RetriableHTTPStatusCode assigned.
func (b *Backoff) WithRetriableHTTPStatusCodes(httpStatusCodes []int) *Backoff {
newBackoff := *b
newBackoff.RetriableHTTPStatusCodes = httpStatusCodes
return &newBackoff
}

Expand All @@ -93,11 +99,6 @@ func (b *Backoff) isNonRetriableError(rerr *Error) bool {
}
}

for _, code := range b.NonRetriableHTTPStatusCodes {
if rerr.HTTPStatusCode == code {
return true
}
}
return false
}

Expand Down Expand Up @@ -162,7 +163,7 @@ func doBackoffRetry(s autorest.Sender, r *http.Request, backoff *Backoff) (resp
return
}
resp, err = s.Do(rr.Request())
rerr := GetError(resp, err)
rerr := GetErrorWithRetriableHTTPStatusCodes(resp, err, backoff.RetriableHTTPStatusCodes)
// Abort retries in the following scenarios:
// 1) request succeed
// 2) request is not retriable
Expand Down
Expand Up @@ -75,11 +75,6 @@ func TestStep(t *testing.T) {
}

func TestDoBackoffRetry(t *testing.T) {
backoff := &Backoff{
Factor: 1.0,
Steps: 3,
NonRetriableHTTPStatusCodes: []int{http.StatusNotFound},
}
fakeRequest := &http.Request{
URL: &url.URL{
Host: "localhost",
Expand All @@ -96,7 +91,7 @@ func TestDoBackoffRetry(t *testing.T) {
HTTPStatusCode: 500,
RawError: fmt.Errorf("HTTP status code (500)"),
}
resp, err := doBackoffRetry(client, fakeRequest, backoff)
resp, err := doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
assert.NotNil(t, resp)
assert.Equal(t, 500, resp.StatusCode)
assert.Equal(t, expectedErr.Error(), err)
Expand All @@ -106,7 +101,7 @@ func TestDoBackoffRetry(t *testing.T) {
r = mocks.NewResponseWithStatus("200 OK", http.StatusOK)
client = mocks.NewSender()
client.AppendAndRepeatResponse(r, 1)
resp, err = doBackoffRetry(client, fakeRequest, backoff)
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
assert.Nil(t, err)
assert.Equal(t, 1, client.Attempts())
assert.NotNil(t, resp)
Expand All @@ -117,28 +112,47 @@ func TestDoBackoffRetry(t *testing.T) {
client = mocks.NewSender()
client.AppendAndRepeatResponse(r, 1)
expectedErr = &Error{
Retriable: true,
Retriable: false,
HTTPStatusCode: 429,
RawError: fmt.Errorf("HTTP status code (429)"),
}
resp, err = doBackoffRetry(client, fakeRequest, backoff)
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
assert.Equal(t, expectedErr.Error(), err)
assert.Equal(t, 1, client.Attempts())
assert.NotNil(t, resp)
assert.Equal(t, 429, resp.StatusCode)

// retry on non retriable error
// don't retry on non retriable error
r = mocks.NewResponseWithStatus("404 StatusNotFound", http.StatusNotFound)
client = mocks.NewSender()
client.AppendAndRepeatResponse(r, 1)
expectedErr = &Error{
Retriable: true,
Retriable: false,
HTTPStatusCode: 404,
RawError: fmt.Errorf("HTTP status code (404)"),
}
resp, err = doBackoffRetry(client, fakeRequest, backoff)
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{Factor: 1.0, Steps: 3})
assert.NotNil(t, resp)
assert.Equal(t, 404, resp.StatusCode)
assert.Equal(t, expectedErr.Error(), err)
assert.Equal(t, 1, client.Attempts())

// retry on RetriableHTTPStatusCodes
r = mocks.NewResponseWithStatus("102 StatusProcessing", http.StatusProcessing)
client = mocks.NewSender()
client.AppendAndRepeatResponse(r, 3)
expectedErr = &Error{
Retriable: true,
HTTPStatusCode: 102,
RawError: fmt.Errorf("HTTP status code (102)"),
}
resp, err = doBackoffRetry(client, fakeRequest, &Backoff{
Factor: 1.0,
Steps: 3,
RetriableHTTPStatusCodes: []int{http.StatusProcessing},
})
assert.NotNil(t, resp)
assert.Equal(t, 102, resp.StatusCode)
assert.Equal(t, expectedErr.Error(), err)
assert.Equal(t, 3, client.Attempts())
}

0 comments on commit 50c8f73

Please sign in to comment.