Skip to content

Commit

Permalink
UPSTREAM: <carry>: delay queuing deletion for PV to allow nodes some …
Browse files Browse the repository at this point in the history
…time to unmount

UPSTREAM: <carry>: Fix sync of PV deletion in PV controller

Always queue PV deletion events immediately, without any wait. It does not
affect dynamic de-provisioning / deletion of volumes, it's done on PVC
deletion.

This de-flakes unit tests, which expect that PV deletion is processed without
waiting too much.

This updates carry patch b24f93e. It still waits for 21 seconds after *PVC*
deletion!
UPSTREAM: <carry>: delay queuing deletion for PV to allow nodes some time to unmount

openshift-rebase(v1.24):source=c5fd3449734
  • Loading branch information
deads2k authored and soltysh committed Aug 22, 2022
1 parent cf6f444 commit 14f860c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pkg/controller/volume/persistentvolume/pv_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ type PersistentVolumeController struct {
// however overall speed of multi-worker controller would be lower than if
// it runs single thread only.
claimQueue *workqueue.Type
volumeQueue *workqueue.Type
volumeQueue workqueue.RateLimitingInterface

// Map of scheduled/running operations.
runningOperations goroutinemap.GoRoutineMap
Expand Down
21 changes: 19 additions & 2 deletions pkg/controller/volume/persistentvolume/pv_controller_base.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ func NewController(p ControllerParameters) (*PersistentVolumeController, error)
createProvisionedPVRetryCount: createProvisionedPVRetryCount,
createProvisionedPVInterval: createProvisionedPVInterval,
claimQueue: workqueue.NewNamed("claims"),
volumeQueue: workqueue.NewNamed("volumes"),
volumeQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "volumes"),
resyncPeriod: p.SyncPeriod,
operationTimestamps: metrics.NewOperationStartTimeCache(),
}
Expand Down Expand Up @@ -197,6 +197,20 @@ func (ctrl *PersistentVolumeController) enqueueWork(queue workqueue.Interface, o
queue.Add(objName)
}

func (ctrl *PersistentVolumeController) enqueueWorkAfter(queue workqueue.DelayingInterface, obj interface{}, delay time.Duration) {
// Beware of "xxx deleted" events
if unknown, ok := obj.(cache.DeletedFinalStateUnknown); ok && unknown.Obj != nil {
obj = unknown.Obj
}
objName, err := controller.KeyFunc(obj)
if err != nil {
klog.Errorf("failed to get key from object: %v", err)
return
}
klog.V(5).Infof("enqueued %q for sync", objName)
queue.AddAfter(objName, delay)
}

func (ctrl *PersistentVolumeController) storeVolumeUpdate(volume interface{}) (bool, error) {
return storeObjectUpdate(ctrl.volumes.store, volume, "volume")
}
Expand Down Expand Up @@ -298,8 +312,11 @@ func (ctrl *PersistentVolumeController) deleteClaim(claim *v1.PersistentVolumeCl
// sync the volume when its claim is deleted. Explicitly sync'ing the
// volume here in response to claim deletion prevents the volume from
// waiting until the next sync period for its Release.
// delay queuing the volume to allow some time for nodes to detach the volume from the node. The time chosen here
// is to hopefully be short enough that e2e tests still pass and long enough that most PVs stop hitting the failure
// errors.
klog.V(5).Infof("deleteClaim[%q]: scheduling sync of volume %s", claimKey, volumeName)
ctrl.volumeQueue.Add(volumeName)
ctrl.volumeQueue.AddAfter(volumeName, 21*time.Second)
}

// Run starts all of this controller's control loops
Expand Down

0 comments on commit 14f860c

Please sign in to comment.