Skip to content
Permalink
Browse files

Handle `matchFields` `nodeSelectorTerm`s in PVs

This commit handles #81725: when specifying `matchFields` in the
`nodeSelectorTerm` field of a `PersistentVolume`, scheduling of a `Pod`
using a `PersistentVolumeClaim` which should match against such
`PersistentVolume` fails when the `bindingMode` of the corresponding
`StorageClass` is `WaitForFirstConsumer`.

This is caused by the scheduler predicates (indirectly) not passing a
`Node`s 'fields' to the check for compatibility between the
`PeristentVolume` and said `Node`, hence the `matchFields` never...
'matching'.

```release-note
`PersistentVolume`s with `matchFields` in their `nodeSelectorTerm`s are
now properly handled when using `bindingMode` `WaitForFirstConsumer`.
```

/sig scheduling
/sig storage
/kind bug

See: #81725
  • Loading branch information...
NicolasT committed Aug 22, 2019
1 parent b8e8130 commit 9e651272ef10a93ee8a24a85ce26f80d2b074153
@@ -800,6 +800,10 @@ func (adc *attachDetachController) GetNodeLabels() (map[string]string, error) {
return nil, fmt.Errorf("GetNodeLabels() unsupported in Attach/Detach controller")
}

func (adc *attachDetachController) GetNodeFields() (map[string]string, error) {
return nil, fmt.Errorf("GetNodeFields() unsupported on Attach/Detach controller")
}

func (adc *attachDetachController) GetNodeName() types.NodeName {
return ""
}
@@ -417,6 +417,10 @@ func (expc *expandController) GetNodeLabels() (map[string]string, error) {
return nil, fmt.Errorf("GetNodeLabels unsupported in expandController")
}

func (expc *expandController) GetNodeFields() (map[string]string, error) {
return nil, fmt.Errorf("GetNodeFields unsupported in expandController")
}

func (expc *expandController) GetNodeName() types.NodeName {
return ""
}
@@ -224,7 +224,7 @@ func FindMatchingVolume(
if node != nil {
// Scheduler path, check that the PV NodeAffinity
// is satisfied by the node
err := volumeutil.CheckNodeAffinity(volume, node.Labels)
err := volumeutil.CheckNodeAffinityByNode(volume, node)
if err != nil {
nodeAffinityValid = false
}
@@ -124,6 +124,10 @@ func (ctrl *PersistentVolumeController) GetNodeLabels() (map[string]string, erro
return nil, fmt.Errorf("GetNodeLabels() unsupported in PersistentVolumeController")
}

func (ctrl *PersistentVolumeController) GetNodeFields() (map[string]string, error) {
return nil, fmt.Errorf("GetNodeFiels() unsupported in PersistentVolumeController")
}

func (ctrl *PersistentVolumeController) GetNodeName() types.NodeName {
return ""
}
@@ -481,7 +481,7 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
}

// Check PV's node affinity (the node might not have the proper label)
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
if err := volumeutil.CheckNodeAffinityByNode(pv, node); err != nil {
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
}

@@ -530,7 +530,7 @@ func (b *volumeBinder) checkBindings(pod *v1.Pod, bindings []*bindingInfo, claim
}
return false, fmt.Errorf("failed to get pv %q from cache: %v", pvc.Spec.VolumeName, err)
}
if err := volumeutil.CheckNodeAffinity(pv, node.Labels); err != nil {
if err := volumeutil.CheckNodeAffinityByNode(pv, node); err != nil {
return false, fmt.Errorf("pv %q node affinity doesn't match node %q: %v", pv.Name, node.Name, err)
}
}
@@ -640,7 +640,7 @@ func (b *volumeBinder) checkBoundClaims(claims []*v1.PersistentVolumeClaim, node
return false, err
}

err = volumeutil.CheckNodeAffinity(pv, node.Labels)
err = volumeutil.CheckNodeAffinityByNode(pv, node)
if err != nil {
klog.V(4).Infof("PersistentVolume %q, Node %q mismatch for Pod %q: %v", pvName, node.Name, podName, err)
return false, nil
@@ -96,6 +96,7 @@ go_library(
"//pkg/kubelet/util/queue:go_default_library",
"//pkg/kubelet/util/sliceutils:go_default_library",
"//pkg/kubelet/volumemanager:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/scheduler/algorithm/predicates:go_default_library",
"//pkg/scheduler/api:go_default_library",
"//pkg/security/apparmor:go_default_library",
@@ -40,6 +40,7 @@ import (
"k8s.io/kubernetes/pkg/kubelet/mountpod"
"k8s.io/kubernetes/pkg/kubelet/secret"
"k8s.io/kubernetes/pkg/kubelet/token"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util"
@@ -278,6 +279,18 @@ func (kvh *kubeletVolumeHost) GetNodeLabels() (map[string]string, error) {
return node.Labels, nil
}

func (kvh *kubeletVolumeHost) GetNodeFields() (map[string]string, error) {
node, err := kvh.kubelet.GetNode()
if err != nil {
return nil, fmt.Errorf("error retrieving node: %v", err)
}
nodeFields := map[string]string{}
for k, f := range algorithm.NodeFieldSelectorKeys {
nodeFields[k] = f(node)
}
return nodeFields, nil
}

func (kvh *kubeletVolumeHost) GetNodeName() types.NodeName {
return kvh.kubelet.nodeName
}
@@ -440,6 +440,9 @@ type VolumeHost interface {
// Returns the labels on the node
GetNodeLabels() (map[string]string, error)

// Returns the fields of the node
GetNodeFields() (map[string]string, error)

// Returns the name of the node
GetNodeName() types.NodeName

@@ -240,6 +240,10 @@ func (f *fakeVolumeHost) GetNodeLabels() (map[string]string, error) {
return f.nodeLabels, nil
}

func (f *fakeVolumeHost) GetNodeFields() (map[string]string, error) {
return map[string]string{"metadata.name": f.nodeName}
}

func (f *fakeVolumeHost) GetNodeName() types.NodeName {
return types.NodeName(f.nodeName)
}
@@ -23,6 +23,7 @@ go_library(
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/features:go_default_library",
"//pkg/scheduler/algorithm:go_default_library",
"//pkg/util/mount:go_default_library",
"//pkg/util/resizefs:go_default_library",
"//pkg/volume:go_default_library",
@@ -34,6 +35,7 @@ go_library(
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1/unstructured:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
@@ -1759,7 +1759,11 @@ func checkNodeAffinity(og *operationGenerator, volumeToMount VolumeToMount) erro
if err != nil {
return err
}
err = util.CheckNodeAffinity(pv, nodeLabels)
nodeFields, err := og.volumePluginMgr.Host.GetNodeFields()
if err != nil {
return err
}
err = util.CheckNodeAffinity(pv, nodeLabels, nodeFields)
if err != nil {
return err
}
@@ -28,6 +28,7 @@ import (
storage "k8s.io/api/storage/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
utypes "k8s.io/apimachinery/pkg/types"
@@ -38,6 +39,7 @@ import (
"k8s.io/kubernetes/pkg/api/legacyscheme"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/pkg/scheduler/algorithm"
"k8s.io/kubernetes/pkg/util/mount"
"k8s.io/kubernetes/pkg/volume"
"k8s.io/kubernetes/pkg/volume/util/types"
@@ -158,21 +160,31 @@ func GetClassForVolume(kubeClient clientset.Interface, pv *v1.PersistentVolume)
return class, nil
}

// CheckNodeAffinityByNode looks at the PV node affinity, and checks if the node has the same corresponding labels
// This ensures that we don't mount a volume that doesn't belong on this node
func CheckNodeAffinityByNode(pv *v1.PersistentVolume, node *v1.Node) error {
nodeFields := map[string]string{}
for k, f := range algorithm.NodeFieldSelectorKeys {
nodeFields[k] = f(node)
}
return CheckNodeAffinity(pv, labels.Set(node.Labels), fields.Set(nodeFields))
}

// CheckNodeAffinity looks at the PV node affinity, and checks if the node has the same corresponding labels
// This ensures that we don't mount a volume that doesn't belong to this node
func CheckNodeAffinity(pv *v1.PersistentVolume, nodeLabels map[string]string) error {
return checkVolumeNodeAffinity(pv, nodeLabels)
func CheckNodeAffinity(pv *v1.PersistentVolume, nodeLabels labels.Set, nodeFields fields.Set) error {
return checkVolumeNodeAffinity(pv, nodeLabels, nodeFields)
}

func checkVolumeNodeAffinity(pv *v1.PersistentVolume, nodeLabels map[string]string) error {
func checkVolumeNodeAffinity(pv *v1.PersistentVolume, nodeLabels labels.Set, nodeFields fields.Set) error {
if pv.Spec.NodeAffinity == nil {
return nil
}

if pv.Spec.NodeAffinity.Required != nil {
terms := pv.Spec.NodeAffinity.Required.NodeSelectorTerms
klog.V(10).Infof("Match for Required node selector terms %+v", terms)
if !v1helper.MatchNodeSelectorTerms(terms, labels.Set(nodeLabels), nil) {
if !v1helper.MatchNodeSelectorTerms(terms, nodeLabels, nodeFields) {
return fmt.Errorf("No matching NodeSelectorTerms")
}
}

0 comments on commit 9e65127

Please sign in to comment.
You can’t perform that action at this time.