Skip to content

Commit

Permalink
Retry if response StatusCode 200 and ContentLength -1
Browse files Browse the repository at this point in the history
ARM will possibly return StatusCode 200 and ContentLength
-1 which is a bug. However, cloudprovider can handle it
better by retrying.

Signed-off-by: Zhecheng Li <zhechengli@microsoft.com>
  • Loading branch information
lzhecheng committed Sep 15, 2022
1 parent 532f956 commit d7b0041
Showing 1 changed file with 10 additions and 3 deletions.
13 changes: 10 additions & 3 deletions pkg/azureclients/armclient/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
// Hack: retry the regional ARM endpoint in case of ARM traffic split and arm resource group replication is too slow
// Empty content and 2xx http status code are returned in this case.
// Issue: https://github.com/kubernetes-sigs/cloud-provider-azure/issues/1296
emptyResp := (response.ContentLength == 0 || trimmed == "" || trimmed == "{}") && response.StatusCode >= 200 && response.StatusCode < 300
// Update: ContentLength == -1 is also considered because ARM can possibly return ContentLength -1 and StatusCode 200.
emptyResp := (response.ContentLength == 0 || response.ContentLength == -1 || trimmed == "" || trimmed == "{}") && response.StatusCode >= 200 && response.StatusCode < 300
if !emptyResp {
if rerr == nil || response.StatusCode == http.StatusNotFound || c.regionalEndpoint == "" {
return response, rerr
Expand Down Expand Up @@ -158,7 +159,7 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
klog.V(6).Infof("Send.sendRegionalRequest on ResourceGroupNotFound error. Retrying regional host: %s", html.EscapeString(request.Host))

regionalResponse, regionalError := s.Do(request)
// only use the result if the regional request actually goes through and returns 2xx status code, for two reasons:
// only use the result if the regional request actually goes through, returns 2xx status code and has valid ContentLength (not -1 or 0), for two reasons:
// 1. the retry on regional ARM host approach is a hack.
// 2. the concatenated regional uri could be wrong as the rule is not officially declared by ARM.
if regionalResponse == nil || regionalResponse.StatusCode > 299 {
Expand All @@ -167,9 +168,15 @@ func DoHackRegionalRetryDecorator(c *Client) autorest.SendDecorator {
regionalErrStr = regionalError.Error()
}

klog.V(6).Infof("Send.sendRegionalRequest failed to get response from regional host, error: '%s'. Ignoring the result.", regionalErrStr)
klog.V(6).Infof("Send.sendRegionalRequest failed to get response from regional host, error: %q. Ignoring the result.", regionalErrStr)
return response, rerr
}
if regionalResponse.StatusCode >= 200 && regionalResponse.StatusCode < 300 && (regionalResponse.ContentLength == -1 || regionalResponse.ContentLength == 0) {
contentLengthErrStr := fmt.Sprintf("empty response with ContentLength %d and StatusCode %d", regionalResponse.ContentLength, regionalResponse.StatusCode)
klog.V(6).Infof(contentLengthErrStr)
return response, fmt.Errorf(contentLengthErrStr)
}

return regionalResponse, regionalError
})
}
Expand Down

0 comments on commit d7b0041

Please sign in to comment.