Skip to content

Commit

Permalink
Retry if response StatusCode 200 and ContentLength -1
Browse files Browse the repository at this point in the history
ARM will possibly return StatusCode 200 and ContentLength
-1 which is a bug. However, cloudprovider can handle it
better by retrying.

Signed-off-by: Zhecheng Li <zhechengli@microsoft.com>
  • Loading branch information
lzhecheng committed Sep 16, 2022
1 parent 10ca8f0 commit 9053575
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 8 deletions.
21 changes: 19 additions & 2 deletions pkg/azureclients/armclient/azure_armclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
// Hack: retry the regional ARM endpoint in case of ARM traffic split and arm resource group replication is too slow
// Empty content and 2xx http status code are returned in this case.
// Issue: https://github.com/kubernetes-sigs/cloud-provider-azure/issues/1296
// Such situation also needs retrying that ContentLength is -1, StatusCode is 200 and an empty body is returned.
emptyResp := (response.ContentLength == 0 || trimmed == "" || trimmed == "{}") && response.StatusCode >= 200 && response.StatusCode < 300
if !emptyResp {
if rerr == nil || response.StatusCode == http.StatusNotFound || c.regionalEndpoint == "" {
Expand All @@ -166,13 +167,14 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
}
}

// Do regional request
currentHost := request.URL.Host
if request.Host != "" {
currentHost = request.Host
}

if strings.HasPrefix(strings.ToLower(currentHost), c.regionalEndpoint) {
klog.V(5).Infof("Send.sendRequest: current host %s is regional host. Skip retrying regional host.", html.EscapeString(currentHost))
klog.V(5).Infof("Send.sendRequest: current host %q is regional host. Skip retrying regional host.", bodyBytes, html.EscapeString(currentHost))
return response, rerr
}

Expand All @@ -190,9 +192,24 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
regionalErrStr = regionalError.Error()
}

klog.V(5).Infof("Send.sendRegionalRequest failed to get response from regional host, error: '%s'. Ignoring the result.", regionalErrStr)
klog.V(5).Infof("Send.sendRegionalRequest failed to get response from regional host, error: %q. Ignoring the result.", regionalErrStr)
return response, rerr
}

// Do the same check on regional response just like the global one
bodyBytes, _ = ioutil.ReadAll(regionalResponse.Body)
defer func() {
regionalResponse.Body = ioutil.NopCloser(bytes.NewBuffer(bodyBytes))
}()
bodyString = string(bodyBytes)
trimmed = strings.TrimSpace(bodyString)
emptyResp = (regionalResponse.ContentLength == 0 || trimmed == "" || trimmed == "{}") && regionalResponse.StatusCode >= 200 && regionalResponse.StatusCode < 300
if emptyResp {
contentLengthErrStr := fmt.Sprintf("empty response with trimmed body %q, ContentLength %d and StatusCode %d", trimmed, regionalResponse.ContentLength, regionalResponse.StatusCode)
klog.Errorf(contentLengthErrStr)
return response, fmt.Errorf(contentLengthErrStr)
}

return regionalResponse, regionalError
})
}
Expand Down
25 changes: 19 additions & 6 deletions pkg/azureclients/armclient/azure_armclient_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (

"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/to"
"github.com/stretchr/testify/assert"

"sigs.k8s.io/cloud-provider-azure/pkg/consts"
Expand Down Expand Up @@ -76,19 +77,28 @@ func TestSend(t *testing.T) {
}
func TestSendFailureRegionalRetry(t *testing.T) {
testcases := []struct {
description string
globalServerErrMsg string
globalServerCode int
description string
globalServerErrMsg string
globalServerCode int
globalServerContentLength *string
}{
{
"RegionalRetry",
"{\"error\":{\"code\":\"ResourceGroupNotFound\"}}",
http.StatusInternalServerError,
to.StringPtr("100"),
},
{
"ReplicationLatency",
"ReplicationLatency-Content-Length-0",
"{}",
http.StatusOK,
to.StringPtr("0"),
},
{
"ReplicationLatency-Content-Length-minus-1",
"{}",
http.StatusOK,
to.StringPtr("-1"),
},
}

Expand All @@ -97,11 +107,14 @@ func TestSendFailureRegionalRetry(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
assert.Equal(t, "GET", r.Method)
w.WriteHeader(http.StatusOK)
_, err := w.Write([]byte("{}"))
_, err := w.Write([]byte("{\"a\": \"b\"}"))
assert.NoError(t, err)
}))

globalServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if tc.globalServerContentLength != nil {
w.Header().Set("Content-Length", *tc.globalServerContentLength)
}
http.Error(w, tc.globalServerErrMsg, tc.globalServerCode)
}))
azConfig := azureclients.ClientConfig{Backoff: &retry.Backoff{Steps: 3}, UserAgent: "test", Location: "eastus"}
Expand All @@ -125,7 +138,7 @@ func TestSendFailureRegionalRetry(t *testing.T) {
assert.NoError(t, err)

response, rerr := armClient.Send(ctx, request)
assert.Nil(t, rerr)
assert.Nil(t, rerr, rerr.Error())
assert.Equal(t, http.StatusOK, response.StatusCode)
assert.Equal(t, targetURL.Host, response.Request.URL.Host)
})
Expand Down

0 comments on commit 9053575

Please sign in to comment.