From cdd304bdb17adb770d13ed659e29dc6d2f849e8e Mon Sep 17 00:00:00 2001 From: andyzhangx Date: Tue, 26 Mar 2024 10:33:06 +0000 Subject: [PATCH] fix: refine check disk lun collision logic --- pkg/azuredisk/azuredisk.go | 13 ++++++++----- pkg/azuredisk/controllerserver.go | 8 ++++++-- pkg/azuredisk/fake_azuredisk.go | 1 + 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/pkg/azuredisk/azuredisk.go b/pkg/azuredisk/azuredisk.go index 4669f9c7a1..01727ed17a 100644 --- a/pkg/azuredisk/azuredisk.go +++ b/pkg/azuredisk/azuredisk.go @@ -138,6 +138,8 @@ type Driver struct { volumeLocks *volumehelper.VolumeLocks // a timed cache for throttling throttlingCache azcache.Resource + // a timed cache for disk lun collision check throttling + checkDiskLunThrottlingCache azcache.Resource } // newDriverV1 Creates a NewCSIDriver object. Assumes vendor version is equal to driver version & @@ -180,13 +182,14 @@ func newDriverV1(options *DriverOptions) *Driver { topologyKey = fmt.Sprintf("topology.%s/zone", driver.Name) - cache, err := azcache.NewTimedCache(5*time.Minute, func(key string) (interface{}, error) { - return nil, nil - }, false) - if err != nil { + getter := func(key string) (interface{}, error) { return nil, nil } + var err error + if driver.throttlingCache, err = azcache.NewTimedCache(5*time.Minute, getter, false); err != nil { + klog.Fatalf("%v", err) + } + if driver.checkDiskLunThrottlingCache, err = azcache.NewTimedCache(30*time.Minute, getter, false); err != nil { klog.Fatalf("%v", err) } - driver.throttlingCache = cache return &driver } diff --git a/pkg/azuredisk/controllerserver.go b/pkg/azuredisk/controllerserver.go index 32bdafee43..d1e17e2b13 100644 --- a/pkg/azuredisk/controllerserver.go +++ b/pkg/azuredisk/controllerserver.go @@ -563,6 +563,10 @@ func (d *Driver) ValidateVolumeCapabilities(ctx context.Context, req *csi.Valida func (d *Driver) getOccupiedLunsFromNode(ctx context.Context, nodeName types.NodeName, diskURI string) []int { var occupiedLuns []int if d.checkDiskLUNCollision && !d.isCheckDiskLunThrottled() { + timer := time.AfterFunc(checkDiskLunThrottleLatency, func() { + klog.Warningf("checkDiskLun(%s) on node %s took longer than %v, disable disk lun check temporarily", diskURI, nodeName, checkDiskLunThrottleLatency) + d.checkDiskLunThrottlingCache.Set(consts.CheckDiskLunThrottlingKey, "") + }) now := time.Now() if usedLunsFromVA, err := d.getUsedLunsFromVolumeAttachments(ctx, string(nodeName)); err == nil { if len(usedLunsFromVA) > 0 { @@ -582,9 +586,9 @@ func (d *Driver) getOccupiedLunsFromNode(ctx context.Context, nodeName types.Nod } latency := time.Since(now) if latency > checkDiskLunThrottleLatency { - klog.Warningf("checkDiskLun(%s) on node %s took %v (limit: %v), disable disk lun check temporarily", diskURI, nodeName, latency, checkDiskLunThrottleLatency) - d.throttlingCache.Set(consts.CheckDiskLunThrottlingKey, "") + klog.Warningf("checkDiskLun(%s) on node %s took %v (limit: %v)", diskURI, nodeName, latency, checkDiskLunThrottleLatency) } else { + timer.Stop() // cancel the timer klog.V(6).Infof("checkDiskLun(%s) on node %s took %v", diskURI, nodeName, latency) } } diff --git a/pkg/azuredisk/fake_azuredisk.go b/pkg/azuredisk/fake_azuredisk.go index 3608503c9c..acac8572d9 100644 --- a/pkg/azuredisk/fake_azuredisk.go +++ b/pkg/azuredisk/fake_azuredisk.go @@ -135,6 +135,7 @@ func newFakeDriverV1(t *testing.T) (*fakeDriverV1, error) { return nil, err } driver.throttlingCache = cache + driver.checkDiskLunThrottlingCache = cache driver.deviceHelper = mockoptimization.NewMockInterface(ctrl) driver.AddControllerServiceCapabilities(