Skip to content

Commit

Permalink
chore: respect RetryAfter in throttling condition
Browse files Browse the repository at this point in the history
  • Loading branch information
andyzhangx committed Feb 19, 2024
1 parent 43e9b38 commit 388d476
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 14 deletions.
1 change: 1 addition & 0 deletions pkg/azurefile/azurefile.go
Expand Up @@ -151,6 +151,7 @@ const (
// define different sleep time when hit throttling
accountOpThrottlingSleepSec = 16
fileOpThrottlingSleepSec = 180
maxThrottlingSleepSec = 1200

defaultAccountNamePrefix = "f"

Expand Down
16 changes: 5 additions & 11 deletions pkg/azurefile/controllerserver.go
Expand Up @@ -34,7 +34,6 @@ import (
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"

Expand Down Expand Up @@ -432,16 +431,11 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
accountName = cache.(string)
} else {
d.volLockMap.LockEntry(lockKey)
err = wait.ExponentialBackoff(d.cloud.RequestBackoff(), func() (bool, error) {
var retErr error
accountName, accountKey, retErr = d.cloud.EnsureStorageAccount(ctx, accountOptions, defaultAccountNamePrefix)
if isRetriableError(retErr) {
klog.Warningf("EnsureStorageAccount(%s) failed with error(%v), waiting for retrying", account, retErr)
sleepIfThrottled(retErr, accountOpThrottlingSleepSec)
return false, nil
}
return true, retErr
})
accountName, accountKey, err = d.cloud.EnsureStorageAccount(ctx, accountOptions, defaultAccountNamePrefix)
if isRetriableError(err) {
klog.Warningf("EnsureStorageAccount(%s) failed with error(%v), waiting for retrying", account, err)
sleepIfThrottled(err, accountOpThrottlingSleepSec)
}
d.volLockMap.UnlockEntry(lockKey)
if err != nil {
return nil, status.Errorf(codes.Internal, "failed to ensure storage account: %v", err)
Expand Down
32 changes: 29 additions & 3 deletions pkg/azurefile/utils.go
Expand Up @@ -19,6 +19,7 @@ package azurefile
import (
"fmt"
"os"
"regexp"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -134,11 +135,36 @@ func isRetriableError(err error) bool {
return false
}

func sleepIfThrottled(err error, sleepSec int) {
func sleepIfThrottled(err error, defaultSleepSec int) {
if err == nil {
return
}
if strings.Contains(strings.ToLower(err.Error()), strings.ToLower(tooManyRequests)) || strings.Contains(strings.ToLower(err.Error()), clientThrottled) {
klog.Warningf("sleep %d more seconds, waiting for throttling complete", sleepSec)
time.Sleep(time.Duration(sleepSec) * time.Second)
retryAfter := getRetryAfterSeconds(err)
if retryAfter == 0 {
retryAfter = defaultSleepSec
}
klog.Warningf("sleep %d more seconds, waiting for throttling complete", retryAfter)
time.Sleep(time.Duration(retryAfter) * time.Second)
}
}

// getRetryAfterSeconds returns the number of seconds to wait from the error message
func getRetryAfterSeconds(err error) int {
if err == nil {
return 0
}
re := regexp.MustCompile(`RetryAfter: (\d+)s`)
match := re.FindStringSubmatch(err.Error())
if len(match) > 1 {
if retryAfter, err := strconv.Atoi(match[1]); err == nil {
if retryAfter > maxThrottlingSleepSec {
return maxThrottlingSleepSec
}
return retryAfter
}
}
return 0
}

func useDataPlaneAPI(volContext map[string]string) bool {
Expand Down
41 changes: 41 additions & 0 deletions pkg/azurefile/utils_test.go
Expand Up @@ -325,7 +325,48 @@ func TestSleepIfThrottled(t *testing.T) {
if elapsed.Seconds() < 10 {
t.Errorf("Expected sleep time(%d), Actual sleep time(%f)", 10, elapsed.Seconds())
}
}

func TestGetRetryAfterSeconds(t *testing.T) {
tests := []struct {
desc string
err error
expected int
}{
{
desc: "nil error",
err: nil,
expected: 0,
},
{
desc: "no match",
err: errors.New("no match"),
expected: 0,
},
{
desc: "match",
err: errors.New("RetryAfter: 10s"),
expected: 10,
},
{
desc: "match error message",
err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 217s, HTTPStatusCode: 0, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"client throttled\""),
expected: 217,
},
{
desc: "match error message exceeds 1200s",
err: errors.New("could not list storage accounts for account type Premium_LRS: Retriable: true, RetryAfter: 2170s, HTTPStatusCode: 0, RawError: azure cloud provider throttled for operation StorageAccountListByResourceGroup with reason \"client throttled\""),
expected: maxThrottlingSleepSec,
},
}

for _, test := range tests {
result := getRetryAfterSeconds(test.err)
if result != test.expected {
t.Errorf("desc: (%s), input: err(%v), getRetryAfterSeconds returned with int(%d), not equal to expected(%d)",
test.desc, test.err, result, test.expected)
}
}
}

func TestUseDataPlaneAPI(t *testing.T) {
Expand Down

0 comments on commit 388d476

Please sign in to comment.