Skip to content

Commit

Permalink
Retry if response StatusCode 200 and ContentLength -1
Browse files Browse the repository at this point in the history
ARM will possibly return StatusCode 200 and ContentLength
-1 which is a bug. However, cloudprovider can handle it
better by retrying.

Signed-off-by: Zhecheng Li <zhechengli@microsoft.com>
  • Loading branch information
lzhecheng committed Sep 15, 2022
1 parent 532f956 commit 61983c0
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 2 deletions.
15 changes: 14 additions & 1 deletion pkg/azureclients/armclient/azure_armclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (

"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/to"
"github.com/stretchr/testify/assert"

"sigs.k8s.io/cloud-provider-azure/pkg/consts"
Expand Down Expand Up @@ -78,16 +79,25 @@ func TestSendFailureRegionalRetry(t *testing.T) {
description string
globalServerErrMsg string
globalServerCode int
contentLength *string
}{
{
"RegionalRetry",
"{\"error\":{\"code\":\"ResourceGroupNotFound\"}}",
http.StatusInternalServerError,
nil,
},
{
"ReplicationLatency",
"ReplicationLatency-Content-Length-0",
"{}",
http.StatusOK,
to.StringPtr("0"),
},
{
"ReplicationLatency-Content-Length-minus-1",
"{}",
http.StatusOK,
to.StringPtr("-1"),
},
}

Expand All @@ -101,6 +111,9 @@ func TestSendFailureRegionalRetry(t *testing.T) {
}))

globalServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tc.contentLength != nil {
w.Header().Set("Content-Length", *tc.contentLength)
}
http.Error(w, tc.globalServerErrMsg, tc.globalServerCode)
}))
azConfig := azureclients.ClientConfig{Backoff: &retry.Backoff{Steps: 3}, UserAgent: "test", Location: "eastus"}
Expand Down
21 changes: 20 additions & 1 deletion pkg/azureclients/armclient/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,11 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
// Hack: retry the regional ARM endpoint in case of ARM traffic split and arm resource group replication is too slow
// Empty content and 2xx http status code are returned in this case.
// Issue: https://github.com/kubernetes-sigs/cloud-provider-azure/issues/1296
// It needs retry if ContentLength == -1, StatusCode 200 and empty body.
emptyResp := (response.ContentLength == 0 || trimmed == "" || trimmed == "{}") && response.StatusCode >= 200 && response.StatusCode < 300
if response.ContentLength == -1 && response.StatusCode >= 200 && response.StatusCode < 300 && (trimmed == "" || trimmed == "{}") {
emptyResp = true
}
if !emptyResp {
if rerr == nil || response.StatusCode == http.StatusNotFound || c.regionalEndpoint == "" {
return response, rerr
Expand All @@ -143,6 +147,7 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
}
}

// Do regional request
currentHost := request.URL.Host
if request.Host != "" {
currentHost = request.Host
Expand All @@ -158,6 +163,7 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
klog.V(6).Infof("Send.sendRegionalRequest on ResourceGroupNotFound error. Retrying regional host: %s", html.EscapeString(request.Host))

regionalResponse, regionalError := s.Do(request)

// only use the result if the regional request actually goes through and returns 2xx status code, for two reasons:
// 1. the retry on regional ARM host approach is a hack.
// 2. the concatenated regional uri could be wrong as the rule is not officially declared by ARM.
Expand All @@ -167,9 +173,22 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
regionalErrStr = regionalError.Error()
}

klog.V(6).Infof("Send.sendRegionalRequest failed to get response from regional host, error: '%s'. Ignoring the result.", regionalErrStr)
klog.V(6).Infof("Send.sendRegionalRequest failed to get response from regional host, error: %q. Ignoring the result.", regionalErrStr)
return response, rerr
}

bodyBytes, _ = ioutil.ReadAll(regionalResponse.Body)
defer func() {
regionalResponse.Body = ioutil.NopCloser(bytes.NewBuffer(bodyBytes))
}()
bodyString = string(bodyBytes)
trimmed = strings.TrimSpace(bodyString)
if regionalResponse.ContentLength == -1 && regionalResponse.StatusCode >= 200 && regionalResponse.StatusCode < 300 && (trimmed == "" || trimmed == "{}") {
contentLengthErrStr := fmt.Sprintf("empty response with ContentLength %d and StatusCode %d", regionalResponse.ContentLength, regionalResponse.StatusCode)
klog.V(6).Infof(contentLengthErrStr)
return response, fmt.Errorf(contentLengthErrStr)
}

return regionalResponse, regionalError
})
}
Expand Down

0 comments on commit 61983c0

Please sign in to comment.