From bc69c362dbd3eb06774d0f6a60c5b151fa8fe94d Mon Sep 17 00:00:00 2001 From: andyzhangx Date: Thu, 11 Apr 2024 03:35:13 +0000 Subject: [PATCH] fix: cache GetVolumeStats on Windows node fix --- pkg/azuredisk/azure_common_darwin.go | 2 +- pkg/azuredisk/azure_common_linux.go | 2 +- pkg/azuredisk/azure_common_windows.go | 17 ++++++++++++++++- pkg/azuredisk/azuredisk.go | 20 +++++++++++++++----- pkg/azuredisk/nodeserver.go | 2 +- pkg/azuredisk/nodeserver_v2.go | 2 +- 6 files changed, 35 insertions(+), 10 deletions(-) diff --git a/pkg/azuredisk/azure_common_darwin.go b/pkg/azuredisk/azure_common_darwin.go index df34dff9f1..09be9dead4 100644 --- a/pkg/azuredisk/azure_common_darwin.go +++ b/pkg/azuredisk/azure_common_darwin.go @@ -119,6 +119,6 @@ func rescanAllVolumes(io azureutils.IOHandler) error { return nil } -func GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { +func (d *DriverCore) GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, volumeID, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { return []*csi.VolumeUsage{}, nil } diff --git a/pkg/azuredisk/azure_common_linux.go b/pkg/azuredisk/azure_common_linux.go index 537a163a0c..e1711bc644 100644 --- a/pkg/azuredisk/azure_common_linux.go +++ b/pkg/azuredisk/azure_common_linux.go @@ -269,7 +269,7 @@ func rescanAllVolumes(io azureutils.IOHandler) error { return nil } -func GetVolumeStats(_ context.Context, m *mount.SafeFormatAndMount, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { +func (d *DriverCore) GetVolumeStats(_ context.Context, m *mount.SafeFormatAndMount, _, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { var volUsages []*csi.VolumeUsage _, err := os.Stat(target) if err != nil { diff --git a/pkg/azuredisk/azure_common_windows.go b/pkg/azuredisk/azure_common_windows.go index 390912181b..ac5089bb6e 100644 --- a/pkg/azuredisk/azure_common_windows.go +++ b/pkg/azuredisk/azure_common_windows.go @@ -25,12 +25,14 @@ import ( "strconv" "github.com/container-storage-interface/spec/lib/go/csi" + "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "k8s.io/klog/v2" "k8s.io/mount-utils" "sigs.k8s.io/azuredisk-csi-driver/pkg/azureutils" "sigs.k8s.io/azuredisk-csi-driver/pkg/mounter" + azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache" ) func formatAndMount(source, target, fstype string, options []string, m *mount.SafeFormatAndMount) error { @@ -162,9 +164,22 @@ func rescanAllVolumes(io azureutils.IOHandler) error { return nil } -func GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { +func (d *DriverCore) GetVolumeStats(ctx context.Context, m *mount.SafeFormatAndMount, volumeID, target string, hostutil hostUtil) ([]*csi.VolumeUsage, error) { + // check if the volume stats is cached + cache, err := d.volStatsCache.Get(volumeID, azcache.CacheReadTypeDefault) + if err != nil { + return nil, status.Errorf(codes.Internal, err.Error()) + } + if cache != nil { + volUsage := cache.(csi.VolumeUsage) + klog.V(6).Infof("NodeGetVolumeStats: volume stats for volume %s path %s is cached", volumeID, target) + return []*csi.VolumeUsage{&volUsage}, nil + } + if proxy, ok := m.Interface.(mounter.CSIProxyMounter); ok { volUsage, err := proxy.GetVolumeStats(ctx, target) + // cache the volume stats per volume + d.volStatsCache.Set(volumeID, *volUsage) return []*csi.VolumeUsage{volUsage}, err } return []*csi.VolumeUsage{}, fmt.Errorf("could not cast to csi proxy class") diff --git a/pkg/azuredisk/azuredisk.go b/pkg/azuredisk/azuredisk.go index 04ba423f25..443ea7b0a5 100644 --- a/pkg/azuredisk/azuredisk.go +++ b/pkg/azuredisk/azuredisk.go @@ -73,6 +73,7 @@ type DriverOptions struct { TrafficManagerPort int64 AttachDetachInitialDelayInMs int64 VMSSCacheTTLInSeconds int64 + VolStatsCacheExpireInMinutes int64 VMType string EnableWindowsHostProcess bool GetNodeIDFromIMDS bool @@ -120,11 +121,14 @@ type DriverCore struct { enableTrafficManager bool trafficManagerPort int64 vmssCacheTTLInSeconds int64 + volStatsCacheExpireInMinutes int64 attachDetachInitialDelayInMs int64 vmType string enableWindowsHostProcess bool getNodeIDFromIMDS bool shouldWaitForSnapshotReady bool + // a timed cache storing volume stats + volStatsCache azcache.Resource } // Driver is the v1 implementation of the Azure Disk CSI Driver. @@ -162,6 +166,7 @@ func newDriverV1(options *DriverOptions) *Driver { driver.enableTrafficManager = options.EnableTrafficManager driver.trafficManagerPort = options.TrafficManagerPort driver.vmssCacheTTLInSeconds = options.VMSSCacheTTLInSeconds + driver.volStatsCacheExpireInMinutes = options.VolStatsCacheExpireInMinutes driver.vmType = options.VMType driver.enableWindowsHostProcess = options.EnableWindowsHostProcess driver.getNodeIDFromIMDS = options.GetNodeIDFromIMDS @@ -172,13 +177,18 @@ func newDriverV1(options *DriverOptions) *Driver { topologyKey = fmt.Sprintf("topology.%s/zone", driver.Name) - cache, err := azcache.NewTimedCache(5*time.Minute, func(key string) (interface{}, error) { - return nil, nil - }, false) - if err != nil { + getter := func(key string) (interface{}, error) { return nil, nil } + var err error + if driver.getDiskThrottlingCache, err = azcache.NewTimedCache(5*time.Minute, getter, false); err != nil { + klog.Fatalf("%v", err) + } + + if options.VolStatsCacheExpireInMinutes <= 0 { + options.VolStatsCacheExpireInMinutes = 10 // default expire in 10 minutes + } + if driver.volStatsCache, err = azcache.NewTimedCache(time.Duration(options.VolStatsCacheExpireInMinutes)*time.Minute, getter, false); err != nil { klog.Fatalf("%v", err) } - driver.getDiskThrottlingCache = cache return &driver } diff --git a/pkg/azuredisk/nodeserver.go b/pkg/azuredisk/nodeserver.go index a87e6af367..1fbc19f8f4 100644 --- a/pkg/azuredisk/nodeserver.go +++ b/pkg/azuredisk/nodeserver.go @@ -444,7 +444,7 @@ func (d *Driver) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolumeS return nil, status.Error(codes.InvalidArgument, "NodeGetVolumeStats volume path was empty") } - volUsage, err := GetVolumeStats(ctx, d.mounter, req.VolumePath, d.hostUtil) + volUsage, err := d.GetVolumeStats(ctx, d.mounter, req.VolumeId, req.VolumePath, d.hostUtil) return &csi.NodeGetVolumeStatsResponse{ Usage: volUsage, }, err diff --git a/pkg/azuredisk/nodeserver_v2.go b/pkg/azuredisk/nodeserver_v2.go index d35a1ffd2b..718e17de7b 100644 --- a/pkg/azuredisk/nodeserver_v2.go +++ b/pkg/azuredisk/nodeserver_v2.go @@ -405,7 +405,7 @@ func (d *DriverV2) NodeGetVolumeStats(ctx context.Context, req *csi.NodeGetVolum return nil, status.Error(codes.InvalidArgument, "NodeGetVolumeStats volume path was empty") } - volUsage, err := GetVolumeStats(ctx, d.mounter, req.VolumePath, d.hostUtil) + volUsage, err := d.GetVolumeStats(ctx, d.mounter, req.VolumeId, req.VolumePath, d.hostUtil) return &csi.NodeGetVolumeStatsResponse{ Usage: volUsage, }, err