Skip to content

Commit

Permalink
Merge pull request #89240 from verult/automated-cherry-pick-of-#88678…
Browse files Browse the repository at this point in the history
…-upstream-release-1.16

Automated cherry pick of #88678: Parallelize attach operations across different nodes for
  • Loading branch information
k8s-ci-robot committed Apr 9, 2020
2 parents 0ff2c2a + 5efc3a3 commit dbf25c6
Show file tree
Hide file tree
Showing 9 changed files with 795 additions and 293 deletions.
2 changes: 1 addition & 1 deletion pkg/controller/volume/attachdetach/reconciler/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ go_library(
"//pkg/controller/volume/attachdetach/statusupdater:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/util/goroutinemap/exponentialbackoff:go_default_library",
"//pkg/volume:go_default_library",
"//pkg/volume/util:go_default_library",
"//pkg/volume/util/operationexecutor:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
Expand Down
85 changes: 36 additions & 49 deletions pkg/controller/volume/attachdetach/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"strings"
"time"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
Expand All @@ -34,7 +34,7 @@ import (
"k8s.io/kubernetes/pkg/controller/volume/attachdetach/statusupdater"
kevents "k8s.io/kubernetes/pkg/kubelet/events"
"k8s.io/kubernetes/pkg/util/goroutinemap/exponentialbackoff"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util"
"k8s.io/kubernetes/pkg/volume/util/operationexecutor"
)

Expand Down Expand Up @@ -134,42 +134,6 @@ func (rc *reconciler) syncStates() {
rc.attacherDetacher.VerifyVolumesAreAttached(volumesPerNode, rc.actualStateOfWorld)
}

// isMultiAttachForbidden checks if attaching this volume to multiple nodes is definitely not allowed/possible.
// In its current form, this function can only reliably say for which volumes it's definitely forbidden. If it returns
// false, it is not guaranteed that multi-attach is actually supported by the volume type and we must rely on the
// attacher to fail fast in such cases.
// Please see https://github.com/kubernetes/kubernetes/issues/40669 and https://github.com/kubernetes/kubernetes/pull/40148#discussion_r98055047
func (rc *reconciler) isMultiAttachForbidden(volumeSpec *volume.Spec) bool {
if volumeSpec.Volume != nil {
// Check for volume types which are known to fail slow or cause trouble when trying to multi-attach
if volumeSpec.Volume.AzureDisk != nil ||
volumeSpec.Volume.Cinder != nil {
return true
}
}

// Only if this volume is a persistent volume, we have reliable information on whether it's allowed or not to
// multi-attach. We trust in the individual volume implementations to not allow unsupported access modes
if volumeSpec.PersistentVolume != nil {
// Check for persistent volume types which do not fail when trying to multi-attach
if len(volumeSpec.PersistentVolume.Spec.AccessModes) == 0 {
// No access mode specified so we don't know for sure. Let the attacher fail if needed
return false
}

// check if this volume is allowed to be attached to multiple PODs/nodes, if yes, return false
for _, accessMode := range volumeSpec.PersistentVolume.Spec.AccessModes {
if accessMode == v1.ReadWriteMany || accessMode == v1.ReadOnlyMany {
return false
}
}
return true
}

// we don't know if it's supported or not and let the attacher fail later in cases it's not supported
return false
}

func (rc *reconciler) reconcile() {
// Detaches are triggered before attaches so that volumes referenced by
// pods that are rescheduled to a different node are detached first.
Expand All @@ -178,13 +142,23 @@ func (rc *reconciler) reconcile() {
for _, attachedVolume := range rc.actualStateOfWorld.GetAttachedVolumes() {
if !rc.desiredStateOfWorld.VolumeExists(
attachedVolume.VolumeName, attachedVolume.NodeName) {
// Don't even try to start an operation if there is already one running
// Check whether there already exist an operation pending, and don't even
// try to start an operation if there is already one running.
// This check must be done before we do any other checks, as otherwise the other checks
// may pass while at the same time the volume leaves the pending state, resulting in
// double detach attempts
if rc.attacherDetacher.IsOperationPending(attachedVolume.VolumeName, "") {
klog.V(10).Infof("Operation for volume %q is already running. Can't start detach for %q", attachedVolume.VolumeName, attachedVolume.NodeName)
continue
// The operation key format is different depending on whether the volume
// allows multi attach across different nodes.
if util.IsMultiAttachAllowed(attachedVolume.VolumeSpec) {
if rc.attacherDetacher.IsOperationPending(attachedVolume.VolumeName, "" /* podName */, attachedVolume.NodeName) {
klog.V(10).Infof("Operation for volume %q is already running for node %q. Can't start detach", attachedVolume.VolumeName, attachedVolume.NodeName)
continue
}
} else {
if rc.attacherDetacher.IsOperationPending(attachedVolume.VolumeName, "" /* podName */, "" /* nodeName */) {
klog.V(10).Infof("Operation for volume %q is already running in the cluster. Can't start detach for %q", attachedVolume.VolumeName, attachedVolume.NodeName)
continue
}
}

// Set the detach request time
Expand Down Expand Up @@ -260,15 +234,27 @@ func (rc *reconciler) attachDesiredVolumes() {
rc.actualStateOfWorld.ResetDetachRequestTime(volumeToAttach.VolumeName, volumeToAttach.NodeName)
continue
}
// Don't even try to start an operation if there is already one running
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "") {
if klog.V(10) {
klog.Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName)

if util.IsMultiAttachAllowed(volumeToAttach.VolumeSpec) {

// Don't even try to start an operation if there is already one running for the given volume and node.
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "" /* podName */, volumeToAttach.NodeName) {
if klog.V(10) {
klog.Infof("Operation for volume %q is already running for node %q. Can't start attach", volumeToAttach.VolumeName, volumeToAttach.NodeName)
}
continue
}

} else {

// Don't even try to start an operation if there is already one running for the given volume
if rc.attacherDetacher.IsOperationPending(volumeToAttach.VolumeName, "" /* podName */, "" /* nodeName */) {
if klog.V(10) {
klog.Infof("Operation for volume %q is already running. Can't start attach for %q", volumeToAttach.VolumeName, volumeToAttach.NodeName)
}
continue
}
continue
}

if rc.isMultiAttachForbidden(volumeToAttach.VolumeSpec) {
nodes := rc.actualStateOfWorld.GetNodesForAttachedVolume(volumeToAttach.VolumeName)
if len(nodes) > 0 {
if !volumeToAttach.MultiAttachErrorReported {
Expand All @@ -277,6 +263,7 @@ func (rc *reconciler) attachDesiredVolumes() {
}
continue
}

}

// Volume/Node doesn't exist, spawn a goroutine to attach it
Expand Down
4 changes: 2 additions & 2 deletions pkg/kubelet/volumemanager/reconciler/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func (rc *reconciler) reconcile() {
for _, attachedVolume := range rc.actualStateOfWorld.GetUnmountedVolumes() {
// Check IsOperationPending to avoid marking a volume as detached if it's in the process of mounting.
if !rc.desiredStateOfWorld.VolumeExists(attachedVolume.VolumeName) &&
!rc.operationExecutor.IsOperationPending(attachedVolume.VolumeName, nestedpendingoperations.EmptyUniquePodName) {
!rc.operationExecutor.IsOperationPending(attachedVolume.VolumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
if attachedVolume.GloballyMounted {
// Volume is globally mounted to device, unmount it
klog.V(5).Infof(attachedVolume.GenerateMsgDetailed("Starting operationExecutor.UnmountDevice", ""))
Expand Down Expand Up @@ -405,7 +405,7 @@ func (rc *reconciler) syncStates() {
continue
}
// There is no pod that uses the volume.
if rc.operationExecutor.IsOperationPending(reconstructedVolume.volumeName, nestedpendingoperations.EmptyUniquePodName) {
if rc.operationExecutor.IsOperationPending(reconstructedVolume.volumeName, nestedpendingoperations.EmptyUniquePodName, nestedpendingoperations.EmptyNodeName) {
klog.Warning("Volume is in pending operation, skip cleaning up mounts")
}
klog.V(2).Infof(
Expand Down
2 changes: 2 additions & 0 deletions pkg/volume/util/nestedpendingoperations/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ go_library(
"//pkg/util/goroutinemap/exponentialbackoff:go_default_library",
"//pkg/volume/util/types:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/runtime:go_default_library",
"//vendor/k8s.io/klog:go_default_library",
],
Expand All @@ -27,6 +28,7 @@ go_test(
"//pkg/util/goroutinemap/exponentialbackoff:go_default_library",
"//pkg/volume/util/types:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
],
)
Expand Down

0 comments on commit dbf25c6

Please sign in to comment.