diff --git a/pkg/kubelet/lifecycle/predicate.go b/pkg/kubelet/lifecycle/predicate.go index e0d3715713c0..a5b2bbf5a0ce 100644 --- a/pkg/kubelet/lifecycle/predicate.go +++ b/pkg/kubelet/lifecycle/predicate.go @@ -237,7 +237,7 @@ func GeneralPredicates(pod *v1.Pod, nodeInfo *schedulerframework.NodeInfo) ([]Pr } if !pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, nodeInfo.Node()) { - reasons = append(reasons, &PredicateFailureError{nodeaffinity.Name, nodeaffinity.ErrReason}) + reasons = append(reasons, &PredicateFailureError{nodeaffinity.Name, nodeaffinity.ErrReasonPod}) } if !nodename.Fits(pod, nodeInfo) { reasons = append(reasons, &PredicateFailureError{nodename.Name, nodename.ErrReason}) diff --git a/pkg/scheduler/apis/config/register.go b/pkg/scheduler/apis/config/register.go index e66fd001f449..715f10a070c7 100644 --- a/pkg/scheduler/apis/config/register.go +++ b/pkg/scheduler/apis/config/register.go @@ -49,6 +49,7 @@ func addKnownTypes(scheme *runtime.Scheme) error { &VolumeBindingArgs{}, &NodeResourcesLeastAllocatedArgs{}, &NodeResourcesMostAllocatedArgs{}, + &NodeAffinityArgs{}, ) scheme.AddKnownTypes(schema.GroupVersion{Group: "", Version: runtime.APIVersionInternal}, &Policy{}) return nil diff --git a/pkg/scheduler/apis/config/scheme/scheme_test.go b/pkg/scheduler/apis/config/scheme/scheme_test.go index c473a9a66472..2fa682c763a6 100644 --- a/pkg/scheduler/apis/config/scheme/scheme_test.go +++ b/pkg/scheduler/apis/config/scheme/scheme_test.go @@ -87,6 +87,15 @@ profiles: - name: VolumeBinding args: bindTimeoutSeconds: 300 + - name: NodeAffinity + args: + addedAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: foo + operator: In + values: ["bar"] `), wantProfiles: []config.KubeSchedulerProfile{ { @@ -148,6 +157,26 @@ profiles: BindTimeoutSeconds: 300, }, }, + { + Name: "NodeAffinity", + Args: &config.NodeAffinityArgs{ + AddedAffinity: &corev1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{ + NodeSelectorTerms: []corev1.NodeSelectorTerm{ + { + MatchExpressions: []corev1.NodeSelectorRequirement{ + { + Key: "foo", + Operator: corev1.NodeSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + }, + }, + }, + }, + }, }, }, }, @@ -271,6 +300,7 @@ profiles: - name: VolumeBinding args: - name: PodTopologySpread + - name: NodeAffinity `), wantProfiles: []config.KubeSchedulerProfile{ { @@ -315,6 +345,10 @@ profiles: DefaultingType: config.SystemDefaulting, }, }, + { + Name: "NodeAffinity", + Args: &config.NodeAffinityArgs{}, + }, }, }, }, diff --git a/pkg/scheduler/apis/config/types_pluginargs.go b/pkg/scheduler/apis/config/types_pluginargs.go index 9e75c8ab9496..4759d8fedee0 100644 --- a/pkg/scheduler/apis/config/types_pluginargs.go +++ b/pkg/scheduler/apis/config/types_pluginargs.go @@ -203,3 +203,18 @@ type VolumeBindingArgs struct { // If this value is nil, the default value will be used. BindTimeoutSeconds int64 } + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// NodeAffinityArgs holds arguments to configure the NodeAffinity plugin. +type NodeAffinityArgs struct { + metav1.TypeMeta + + // AddedAffinity is applied to all Pods additionally to the NodeAffinity + // specified in the PodSpec. That is, Nodes need to satisfy AddedAffinity + // AND .spec.NodeAffinity. AddedAffinity is empty by default (all Nodes + // match). + // When AddedAffinity is used, some Pods with affinity requirements that match + // a specific Node (such as Daemonset Pods) might remain unschedulable. + AddedAffinity *v1.NodeAffinity +} diff --git a/pkg/scheduler/apis/config/v1beta1/zz_generated.conversion.go b/pkg/scheduler/apis/config/v1beta1/zz_generated.conversion.go index bd254af06abc..1afbbfe72cff 100644 --- a/pkg/scheduler/apis/config/v1beta1/zz_generated.conversion.go +++ b/pkg/scheduler/apis/config/v1beta1/zz_generated.conversion.go @@ -80,6 +80,16 @@ func RegisterConversions(s *runtime.Scheme) error { }); err != nil { return err } + if err := s.AddGeneratedConversionFunc((*v1beta1.NodeAffinityArgs)(nil), (*config.NodeAffinityArgs)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_v1beta1_NodeAffinityArgs_To_config_NodeAffinityArgs(a.(*v1beta1.NodeAffinityArgs), b.(*config.NodeAffinityArgs), scope) + }); err != nil { + return err + } + if err := s.AddGeneratedConversionFunc((*config.NodeAffinityArgs)(nil), (*v1beta1.NodeAffinityArgs)(nil), func(a, b interface{}, scope conversion.Scope) error { + return Convert_config_NodeAffinityArgs_To_v1beta1_NodeAffinityArgs(a.(*config.NodeAffinityArgs), b.(*v1beta1.NodeAffinityArgs), scope) + }); err != nil { + return err + } if err := s.AddGeneratedConversionFunc((*v1beta1.NodeLabelArgs)(nil), (*config.NodeLabelArgs)(nil), func(a, b interface{}, scope conversion.Scope) error { return Convert_v1beta1_NodeLabelArgs_To_config_NodeLabelArgs(a.(*v1beta1.NodeLabelArgs), b.(*config.NodeLabelArgs), scope) }); err != nil { @@ -480,6 +490,26 @@ func Convert_config_KubeSchedulerProfile_To_v1beta1_KubeSchedulerProfile(in *con return autoConvert_config_KubeSchedulerProfile_To_v1beta1_KubeSchedulerProfile(in, out, s) } +func autoConvert_v1beta1_NodeAffinityArgs_To_config_NodeAffinityArgs(in *v1beta1.NodeAffinityArgs, out *config.NodeAffinityArgs, s conversion.Scope) error { + out.AddedAffinity = (*corev1.NodeAffinity)(unsafe.Pointer(in.AddedAffinity)) + return nil +} + +// Convert_v1beta1_NodeAffinityArgs_To_config_NodeAffinityArgs is an autogenerated conversion function. +func Convert_v1beta1_NodeAffinityArgs_To_config_NodeAffinityArgs(in *v1beta1.NodeAffinityArgs, out *config.NodeAffinityArgs, s conversion.Scope) error { + return autoConvert_v1beta1_NodeAffinityArgs_To_config_NodeAffinityArgs(in, out, s) +} + +func autoConvert_config_NodeAffinityArgs_To_v1beta1_NodeAffinityArgs(in *config.NodeAffinityArgs, out *v1beta1.NodeAffinityArgs, s conversion.Scope) error { + out.AddedAffinity = (*corev1.NodeAffinity)(unsafe.Pointer(in.AddedAffinity)) + return nil +} + +// Convert_config_NodeAffinityArgs_To_v1beta1_NodeAffinityArgs is an autogenerated conversion function. +func Convert_config_NodeAffinityArgs_To_v1beta1_NodeAffinityArgs(in *config.NodeAffinityArgs, out *v1beta1.NodeAffinityArgs, s conversion.Scope) error { + return autoConvert_config_NodeAffinityArgs_To_v1beta1_NodeAffinityArgs(in, out, s) +} + func autoConvert_v1beta1_NodeLabelArgs_To_config_NodeLabelArgs(in *v1beta1.NodeLabelArgs, out *config.NodeLabelArgs, s conversion.Scope) error { out.PresentLabels = *(*[]string)(unsafe.Pointer(&in.PresentLabels)) out.AbsentLabels = *(*[]string)(unsafe.Pointer(&in.AbsentLabels)) diff --git a/pkg/scheduler/apis/config/validation/BUILD b/pkg/scheduler/apis/config/validation/BUILD index a259578e3aca..02c2eaa18029 100644 --- a/pkg/scheduler/apis/config/validation/BUILD +++ b/pkg/scheduler/apis/config/validation/BUILD @@ -18,6 +18,7 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/util/validation:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/validation/field:go_default_library", "//staging/src/k8s.io/component-base/config/validation:go_default_library", + "//staging/src/k8s.io/component-helpers/scheduling/corev1/nodeaffinity:go_default_library", "//vendor/github.com/google/go-cmp/cmp:go_default_library", ], ) @@ -33,7 +34,10 @@ go_test( "//pkg/scheduler/apis/config:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/util/validation/field:go_default_library", "//staging/src/k8s.io/component-base/config:go_default_library", + "//vendor/github.com/google/go-cmp/cmp:go_default_library", + "//vendor/github.com/google/go-cmp/cmp/cmpopts:go_default_library", ], ) diff --git a/pkg/scheduler/apis/config/validation/validation_pluginargs.go b/pkg/scheduler/apis/config/validation/validation_pluginargs.go index 5af444271579..b60765b6c92f 100644 --- a/pkg/scheduler/apis/config/validation/validation_pluginargs.go +++ b/pkg/scheduler/apis/config/validation/validation_pluginargs.go @@ -23,6 +23,7 @@ import ( metav1validation "k8s.io/apimachinery/pkg/apis/meta/v1/validation" "k8s.io/apimachinery/pkg/util/sets" "k8s.io/apimachinery/pkg/util/validation/field" + "k8s.io/component-helpers/scheduling/corev1/nodeaffinity" "k8s.io/kubernetes/pkg/scheduler/apis/config" ) @@ -257,3 +258,29 @@ func validateResources(resources []config.ResourceSpec) error { } return nil } + +// ValidateNodeAffinityArgs validates that NodeAffinityArgs are correct. +func ValidateNodeAffinityArgs(args *config.NodeAffinityArgs) error { + if args.AddedAffinity == nil { + return nil + } + affinity := args.AddedAffinity + f := field.NewPath("addedAffinity") + var allErrs field.ErrorList + if ns := affinity.RequiredDuringSchedulingIgnoredDuringExecution; ns != nil { + _, err := nodeaffinity.NewNodeSelector(ns) + if err != nil { + // TODO(#96167): Expand all field.Error(s) returned once constructor use them. + allErrs = append(allErrs, field.Invalid(f.Child("requiredDuringSchedulingIgnoredDuringExecution"), affinity.RequiredDuringSchedulingIgnoredDuringExecution, err.Error())) + } + } + // TODO: Add validation for requiredDuringSchedulingRequiredDuringExecution when it gets added to the API. + if terms := affinity.PreferredDuringSchedulingIgnoredDuringExecution; len(terms) != 0 { + _, err := nodeaffinity.NewPreferredSchedulingTerms(terms) + if err != nil { + // TODO(#96167): Expand all field.Error(s) returned once constructor use them. + allErrs = append(allErrs, field.Invalid(f.Child("preferredDuringSchedulingIgnoredDuringExecution"), affinity.PreferredDuringSchedulingIgnoredDuringExecution, err.Error())) + } + } + return allErrs.ToAggregate() +} diff --git a/pkg/scheduler/apis/config/validation/validation_pluginargs_test.go b/pkg/scheduler/apis/config/validation/validation_pluginargs_test.go index 52fd5a177849..681b49ba1a85 100644 --- a/pkg/scheduler/apis/config/validation/validation_pluginargs_test.go +++ b/pkg/scheduler/apis/config/validation/validation_pluginargs_test.go @@ -19,11 +19,18 @@ package validation import ( "testing" + "github.com/google/go-cmp/cmp" + "github.com/google/go-cmp/cmp/cmpopts" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/validation/field" "k8s.io/kubernetes/pkg/scheduler/apis/config" ) +var ( + ignoreBadValueDetail = cmpopts.IgnoreFields(field.Error{}, "BadValue", "Detail") +) + func TestValidateDefaultPreemptionArgs(t *testing.T) { cases := map[string]struct { args config.DefaultPreemptionArgs @@ -553,6 +560,98 @@ func TestValidateNodeResourcesMostAllocatedArgs(t *testing.T) { } } +func TestValidateNodeAffinityArgs(t *testing.T) { + cases := []struct { + name string + args config.NodeAffinityArgs + wantErr error + }{ + { + name: "empty", + }, + { + name: "valid added affinity", + args: config.NodeAffinityArgs{ + AddedAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "label-1", + Operator: v1.NodeSelectorOpIn, + Values: []string{"label-1-val"}, + }, + }, + }, + }, + }, + PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ + { + Weight: 1, + Preference: v1.NodeSelectorTerm{ + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: "metadata.name", + Operator: v1.NodeSelectorOpIn, + Values: []string{"node-1"}, + }, + }, + }, + }, + }, + }, + }, + }, + { + name: "invalid added affinity", + args: config.NodeAffinityArgs{ + AddedAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "invalid/label/key", + Operator: v1.NodeSelectorOpIn, + Values: []string{"label-1-val"}, + }, + }, + }, + }, + }, + PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ + { + Weight: 1, + Preference: v1.NodeSelectorTerm{ + MatchFields: []v1.NodeSelectorRequirement{ + { + Key: "metadata.name", + Operator: v1.NodeSelectorOpIn, + Values: []string{"node-1", "node-2"}, + }, + }, + }, + }, + }, + }, + }, + wantErr: field.ErrorList{ + field.Invalid(field.NewPath("addedAffinity", "requiredDuringSchedulingIgnoredDuringExecution"), nil, ""), + field.Invalid(field.NewPath("addedAffinity", "preferredDuringSchedulingIgnoredDuringExecution"), nil, ""), + }.ToAggregate(), + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + err := ValidateNodeAffinityArgs(&tc.args) + if diff := cmp.Diff(err, tc.wantErr, ignoreBadValueDetail); diff != "" { + t.Fatalf("ValidatedNodeAffinityArgs returned err (-want,+got):\n%s", diff) + } + }) + } +} + func assertErr(t *testing.T, wantErr string, gotErr error) { if wantErr == "" { if gotErr != nil { diff --git a/pkg/scheduler/apis/config/zz_generated.deepcopy.go b/pkg/scheduler/apis/config/zz_generated.deepcopy.go index d102f4c5a794..bc205cfed59c 100644 --- a/pkg/scheduler/apis/config/zz_generated.deepcopy.go +++ b/pkg/scheduler/apis/config/zz_generated.deepcopy.go @@ -257,6 +257,36 @@ func (in *LabelsPresence) DeepCopy() *LabelsPresence { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeAffinityArgs) DeepCopyInto(out *NodeAffinityArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.AddedAffinity != nil { + in, out := &in.AddedAffinity, &out.AddedAffinity + *out = new(v1.NodeAffinity) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeAffinityArgs. +func (in *NodeAffinityArgs) DeepCopy() *NodeAffinityArgs { + if in == nil { + return nil + } + out := new(NodeAffinityArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeAffinityArgs) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeLabelArgs) DeepCopyInto(out *NodeLabelArgs) { *out = *in diff --git a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go index 1d0bb339cd9a..8c07823bbb6a 100644 --- a/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go +++ b/pkg/scheduler/framework/plugins/defaultpreemption/default_preemption_test.go @@ -1284,7 +1284,7 @@ func TestNodesWherePreemptionMightHelp(t *testing.T) { { name: "No node should be attempted", nodesStatuses: framework.NodeToStatusMap{ - "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodeaffinity.ErrReason), + "node1": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodeaffinity.ErrReasonPod), "node2": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodename.ErrReason), "node3": framework.NewStatus(framework.UnschedulableAndUnresolvable, tainttoleration.ErrReasonNotMatch), "node4": framework.NewStatus(framework.UnschedulableAndUnresolvable, nodelabel.ErrReasonPresenceViolated), diff --git a/pkg/scheduler/framework/plugins/helper/node_affinity.go b/pkg/scheduler/framework/plugins/helper/node_affinity.go index a34346183867..0f6046b83bae 100644 --- a/pkg/scheduler/framework/plugins/helper/node_affinity.go +++ b/pkg/scheduler/framework/plugins/helper/node_affinity.go @@ -32,44 +32,39 @@ func PodMatchesNodeSelectorAndAffinityTerms(pod *v1.Pod, node *v1.Node) bool { return false } } + if pod.Spec.Affinity == nil { + return true + } + return NodeMatchesNodeAffinity(pod.Spec.Affinity.NodeAffinity, node) +} +// NodeMatchesNodeAffinity checks whether the Node satisfy the given NodeAffinity. +func NodeMatchesNodeAffinity(affinity *v1.NodeAffinity, node *v1.Node) bool { // 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes) // 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes // 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity // 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes // 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity // 6. non-nil empty NodeSelectorRequirement is not allowed - nodeAffinityMatches := true - affinity := pod.Spec.Affinity - if affinity != nil && affinity.NodeAffinity != nil { - nodeAffinity := affinity.NodeAffinity - // if no required NodeAffinity requirements, will do no-op, means select all nodes. - // TODO: Replace next line with subsequent commented-out line when implement RequiredDuringSchedulingRequiredDuringExecution. - if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { - // if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution == nil && nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { - return true - } - - // Match node selector for requiredDuringSchedulingRequiredDuringExecution. - // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. - // if nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution != nil { - // nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingRequiredDuringExecution.NodeSelectorTerms - // klog.V(10).Infof("Match for RequiredDuringSchedulingRequiredDuringExecution node selector terms %+v", nodeSelectorTerms) - // nodeAffinityMatches = nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms) - // } - - // Match node selector for requiredDuringSchedulingIgnoredDuringExecution. - if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { - nodeAffinityMatches = nodeAffinityMatches && nodeMatchesNodeSelectorTerms(node, nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution) - } + if affinity == nil { + return true + } + // Match node selector for requiredDuringSchedulingRequiredDuringExecution. + // TODO: Uncomment this block when implement RequiredDuringSchedulingRequiredDuringExecution. + // if affinity.RequiredDuringSchedulingRequiredDuringExecution != nil && !nodeMatchesNodeSelector(node, affinity.RequiredDuringSchedulingRequiredDuringExecution) { + // return false + // } + // Match node selector for requiredDuringSchedulingIgnoredDuringExecution. + if affinity.RequiredDuringSchedulingIgnoredDuringExecution != nil && !nodeMatchesNodeSelector(node, affinity.RequiredDuringSchedulingIgnoredDuringExecution) { + return false } - return nodeAffinityMatches + return true } -// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms, +// nodeMatchesNodeSelector checks if a node's labels satisfy a list of node selector terms, // terms are ORed, and an empty list of terms will match nothing. -func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelector *v1.NodeSelector) bool { +func nodeMatchesNodeSelector(node *v1.Node, nodeSelector *v1.NodeSelector) bool { // TODO(#96164): parse this error earlier in the plugin so we only need to do it once per Pod. matches, _ := corev1.MatchNodeSelectorTerms(node, nodeSelector) return matches diff --git a/pkg/scheduler/framework/plugins/nodeaffinity/BUILD b/pkg/scheduler/framework/plugins/nodeaffinity/BUILD index 6b7d95ee85f3..91d3fc796e4d 100644 --- a/pkg/scheduler/framework/plugins/nodeaffinity/BUILD +++ b/pkg/scheduler/framework/plugins/nodeaffinity/BUILD @@ -6,6 +6,8 @@ go_library( importpath = "k8s.io/kubernetes/pkg/scheduler/framework/plugins/nodeaffinity", visibility = ["//visibility:public"], deps = [ + "//pkg/scheduler/apis/config:go_default_library", + "//pkg/scheduler/apis/config/validation:go_default_library", "//pkg/scheduler/framework:go_default_library", "//pkg/scheduler/framework/plugins/helper:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", @@ -34,10 +36,12 @@ go_test( embed = [":go_default_library"], deps = [ "//pkg/apis/core:go_default_library", + "//pkg/scheduler/apis/config:go_default_library", "//pkg/scheduler/framework:go_default_library", "//pkg/scheduler/framework/runtime:go_default_library", "//pkg/scheduler/internal/cache:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//vendor/github.com/google/go-cmp/cmp:go_default_library", ], ) diff --git a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go index aad73c0526bf..15107ef76d35 100644 --- a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go +++ b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity.go @@ -23,13 +23,17 @@ import ( v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/component-helpers/scheduling/corev1/nodeaffinity" + "k8s.io/kubernetes/pkg/scheduler/apis/config" + "k8s.io/kubernetes/pkg/scheduler/apis/config/validation" "k8s.io/kubernetes/pkg/scheduler/framework" pluginhelper "k8s.io/kubernetes/pkg/scheduler/framework/plugins/helper" ) // NodeAffinity is a plugin that checks if a pod node selector matches the node label. type NodeAffinity struct { - handle framework.Handle + handle framework.Handle + addedNodeSelector *nodeaffinity.NodeSelector + addedPrefSchedTerms *nodeaffinity.PreferredSchedulingTerms } var _ framework.FilterPlugin = &NodeAffinity{} @@ -39,8 +43,11 @@ const ( // Name is the name of the plugin used in the plugin registry and configurations. Name = "NodeAffinity" - // ErrReason for node affinity/selector not matching. - ErrReason = "node(s) didn't match node selector" + // ErrReasonPod is the reason for Pod's node affinity/selector not matching. + ErrReasonPod = "node(s) didn't match Pod's node affinity" + + // errReasonEnforced is the reason for added node affinity not matching. + errReasonEnforced = "node(s) didn't match scheduler-enforced node affinity" ) // Name returns name of the plugin. It is used in logs, etc. @@ -48,19 +55,25 @@ func (pl *NodeAffinity) Name() string { return Name } -// Filter invoked at the filter extension point. +// Filter checks if the Node matches the Pod .spec.affinity.nodeAffinity and +// the plugin's added affinity. func (pl *NodeAffinity) Filter(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status { node := nodeInfo.Node() if node == nil { return framework.NewStatus(framework.Error, "node not found") } + if pl.addedNodeSelector != nil && !pl.addedNodeSelector.Match(node) { + return framework.NewStatus(framework.UnschedulableAndUnresolvable, errReasonEnforced) + } if !pluginhelper.PodMatchesNodeSelectorAndAffinityTerms(pod, node) { - return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason) + return framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod) } return nil } -// Score invoked at the Score extension point. +// Score returns the sum of the weights of the terms that match the Node. +// Terms came from the Pod .spec.affinity.nodeAffinity and from the plugin's +// default affinity. func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState, pod *v1.Pod, nodeName string) (int64, *framework.Status) { nodeInfo, err := pl.handle.SnapshotSharedLister().NodeInfos().Get(nodeName) if err != nil { @@ -75,6 +88,9 @@ func (pl *NodeAffinity) Score(ctx context.Context, state *framework.CycleState, affinity := pod.Spec.Affinity var count int64 + if pl.addedPrefSchedTerms != nil { + count += pl.addedPrefSchedTerms.Score(node) + } // A nil element of PreferredDuringSchedulingIgnoredDuringExecution matches no objects. // An element of PreferredDuringSchedulingIgnoredDuringExecution that refers to an // empty PreferredSchedulingTerm matches all objects. @@ -101,6 +117,36 @@ func (pl *NodeAffinity) ScoreExtensions() framework.ScoreExtensions { } // New initializes a new plugin and returns it. -func New(_ runtime.Object, h framework.Handle) (framework.Plugin, error) { - return &NodeAffinity{handle: h}, nil +func New(plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) { + args, err := getArgs(plArgs) + if err != nil { + return nil, err + } + pl := &NodeAffinity{ + handle: h, + } + if args.AddedAffinity != nil { + if ns := args.AddedAffinity.RequiredDuringSchedulingIgnoredDuringExecution; ns != nil { + pl.addedNodeSelector, err = nodeaffinity.NewNodeSelector(ns) + if err != nil { + return nil, fmt.Errorf("parsing addedAffinity.requiredDuringSchedulingIgnoredDuringExecution: %w", err) + } + } + // TODO: parse requiredDuringSchedulingRequiredDuringExecution when it gets added to the API. + if terms := args.AddedAffinity.PreferredDuringSchedulingIgnoredDuringExecution; len(terms) != 0 { + pl.addedPrefSchedTerms, err = nodeaffinity.NewPreferredSchedulingTerms(terms) + if err != nil { + return nil, fmt.Errorf("parsing addedAffinity.preferredDuringSchedulingIgnoredDuringExecution: %w", err) + } + } + } + return pl, nil +} + +func getArgs(obj runtime.Object) (config.NodeAffinityArgs, error) { + ptr, ok := obj.(*config.NodeAffinityArgs) + if !ok { + return config.NodeAffinityArgs{}, fmt.Errorf("args are not of type NodeAffinityArgs, got %T", obj) + } + return *ptr, validation.ValidateNodeAffinityArgs(ptr) } diff --git a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go index 68b52df48ab1..29d134ba245f 100644 --- a/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go +++ b/pkg/scheduler/framework/plugins/nodeaffinity/node_affinity_test.go @@ -21,9 +21,11 @@ import ( "reflect" "testing" + "github.com/google/go-cmp/cmp" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" api "k8s.io/kubernetes/pkg/apis/core" + "k8s.io/kubernetes/pkg/scheduler/apis/config" "k8s.io/kubernetes/pkg/scheduler/framework" "k8s.io/kubernetes/pkg/scheduler/framework/runtime" "k8s.io/kubernetes/pkg/scheduler/internal/cache" @@ -37,6 +39,7 @@ func TestNodeAffinity(t *testing.T) { nodeName string name string wantStatus *framework.Status + args config.NodeAffinityArgs }{ { pod: &v1.Pod{}, @@ -51,7 +54,7 @@ func TestNodeAffinity(t *testing.T) { }, }, name: "missing labels", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -93,7 +96,7 @@ func TestNodeAffinity(t *testing.T) { "foo": "bar", }, name: "node labels are subset", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -229,7 +232,7 @@ func TestNodeAffinity(t *testing.T) { "foo": "bar", }, name: "Pod with affinity that don't match node's labels won't schedule onto the node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -247,7 +250,7 @@ func TestNodeAffinity(t *testing.T) { "foo": "bar", }, name: "Pod with a nil []NodeSelectorTerm in affinity, can't match the node's labels and won't schedule onto the node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -265,7 +268,7 @@ func TestNodeAffinity(t *testing.T) { "foo": "bar", }, name: "Pod with an empty []NodeSelectorTerm in affinity, can't match the node's labels and won't schedule onto the node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -287,7 +290,7 @@ func TestNodeAffinity(t *testing.T) { "foo": "bar", }, name: "Pod with empty MatchExpressions is not a valid value will match no objects and won't schedule onto the node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{}, @@ -370,7 +373,7 @@ func TestNodeAffinity(t *testing.T) { "GPU": "NVIDIA-GRID-K1", }, name: "Pod with multiple matchExpressions ANDed that doesn't match the existing node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -467,7 +470,7 @@ func TestNodeAffinity(t *testing.T) { }, name: "Pod with an Affinity matches node's labels but the PodSpec.NodeSelector(the old thing that we are deprecating) " + "is not satisfied, won't schedule onto the node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -495,7 +498,7 @@ func TestNodeAffinity(t *testing.T) { "foo": "bar", }, name: "Pod with an invalid value in Affinity term won't be scheduled onto the node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -546,7 +549,7 @@ func TestNodeAffinity(t *testing.T) { }, nodeName: "node_2", name: "Pod with matchFields using In operator that does not match the existing node", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -615,7 +618,7 @@ func TestNodeAffinity(t *testing.T) { nodeName: "node_2", labels: map[string]string{"foo": "bar"}, name: "Pod with one term: matchFields does not match, but matchExpressions matches", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), }, { pod: &v1.Pod{ @@ -684,7 +687,108 @@ func TestNodeAffinity(t *testing.T) { nodeName: "node_2", labels: map[string]string{"foo": "bar"}, name: "Pod with two terms: both matchFields and matchExpressions do not match", - wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReason), + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), + }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "zone", + Operator: v1.NodeSelectorOpIn, + Values: []string{"foo"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_2", + labels: map[string]string{"zone": "foo"}, + args: config.NodeAffinityArgs{ + AddedAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{{ + MatchFields: []v1.NodeSelectorRequirement{{ + Key: api.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_2"}, + }}, + }}, + }, + }, + }, + name: "Matches added affinity and Pod's node affinity", + }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "zone", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodeName: "node_2", + labels: map[string]string{"zone": "foo"}, + args: config.NodeAffinityArgs{ + AddedAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{{ + MatchFields: []v1.NodeSelectorRequirement{{ + Key: api.ObjectNameField, + Operator: v1.NodeSelectorOpIn, + Values: []string{"node_2"}, + }}, + }}, + }, + }, + }, + name: "Matches added affinity but not Pod's node affinity", + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, ErrReasonPod), + }, + { + pod: &v1.Pod{}, + nodeName: "node_2", + labels: map[string]string{"zone": "foo"}, + args: config.NodeAffinityArgs{ + AddedAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{{ + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "zone", + Operator: v1.NodeSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }}, + }, + }, + }, + name: "Doesn't match added affinity", + wantStatus: framework.NewStatus(framework.UnschedulableAndUnresolvable, errReasonEnforced), }, } @@ -697,7 +801,10 @@ func TestNodeAffinity(t *testing.T) { nodeInfo := framework.NewNodeInfo() nodeInfo.SetNode(&node) - p, _ := New(nil, nil) + p, err := New(&test.args, nil) + if err != nil { + t.Fatalf("Creating plugin: %v", err) + } gotStatus := p.(framework.FilterPlugin).Filter(context.Background(), nil, test.pod, nodeInfo) if !reflect.DeepEqual(gotStatus, test.wantStatus) { t.Errorf("status does not match: %v, want: %v", gotStatus, test.wantStatus) @@ -786,6 +893,7 @@ func TestNodeAffinityPriority(t *testing.T) { nodes []*v1.Node expectedList framework.NodeScoreList name string + args config.NodeAffinityArgs }{ { pod: &v1.Pod{ @@ -843,6 +951,50 @@ func TestNodeAffinityPriority(t *testing.T) { expectedList: []framework.NodeScore{{Name: "machine1", Score: 18}, {Name: "machine5", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 36}}, name: "all machines matches the preferred scheduling requirements of pod but with different priorities ", }, + { + pod: &v1.Pod{}, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: framework.MaxNodeScore}, {Name: "machine2", Score: 0}}, + name: "added affinity", + args: config.NodeAffinityArgs{ + AddedAffinity: affinity1.NodeAffinity, + }, + }, + { + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: affinity1, + }, + }, + nodes: []*v1.Node{ + {ObjectMeta: metav1.ObjectMeta{Name: "machine1", Labels: label1}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine2", Labels: label2}}, + {ObjectMeta: metav1.ObjectMeta{Name: "machine3", Labels: label5}}, + }, + expectedList: []framework.NodeScore{{Name: "machine1", Score: 40}, {Name: "machine2", Score: 60}, {Name: "machine3", Score: framework.MaxNodeScore}}, + name: "added affinity and pod has default affinity", + args: config.NodeAffinityArgs{ + AddedAffinity: &v1.NodeAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []v1.PreferredSchedulingTerm{ + { + Weight: 3, + Preference: v1.NodeSelectorTerm{ + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: "key", + Operator: v1.NodeSelectorOpIn, + Values: []string{"value"}, + }, + }, + }, + }, + }, + }, + }, + }, } for _, test := range tests { @@ -850,7 +1002,10 @@ func TestNodeAffinityPriority(t *testing.T) { state := framework.NewCycleState() fh, _ := runtime.NewFramework(nil, nil, nil, runtime.WithSnapshotSharedLister(cache.NewSnapshot(nil, test.nodes))) - p, _ := New(nil, fh) + p, err := New(&test.args, fh) + if err != nil { + t.Fatalf("Creating plugin: %v", err) + } var gotList framework.NodeScoreList for _, n := range test.nodes { nodeName := n.ObjectMeta.Name @@ -866,8 +1021,8 @@ func TestNodeAffinityPriority(t *testing.T) { t.Errorf("unexpected error: %v", status) } - if !reflect.DeepEqual(test.expectedList, gotList) { - t.Errorf("expected:\n\t%+v,\ngot:\n\t%+v", test.expectedList, gotList) + if diff := cmp.Diff(test.expectedList, gotList); diff != "" { + t.Errorf("obtained scores (-want,+got):\n%s", diff) } }) } diff --git a/staging/src/k8s.io/kube-scheduler/config/v1beta1/register.go b/staging/src/k8s.io/kube-scheduler/config/v1beta1/register.go index 602053ac6276..1ad5c667ee61 100644 --- a/staging/src/k8s.io/kube-scheduler/config/v1beta1/register.go +++ b/staging/src/k8s.io/kube-scheduler/config/v1beta1/register.go @@ -48,6 +48,7 @@ func addKnownTypes(scheme *runtime.Scheme) error { &NodeResourcesLeastAllocatedArgs{}, &NodeResourcesMostAllocatedArgs{}, &VolumeBindingArgs{}, + &NodeAffinityArgs{}, ) return nil } diff --git a/staging/src/k8s.io/kube-scheduler/config/v1beta1/types_pluginargs.go b/staging/src/k8s.io/kube-scheduler/config/v1beta1/types_pluginargs.go index 0c7d14b85ace..22d9dbb2b36f 100644 --- a/staging/src/k8s.io/kube-scheduler/config/v1beta1/types_pluginargs.go +++ b/staging/src/k8s.io/kube-scheduler/config/v1beta1/types_pluginargs.go @@ -215,3 +215,19 @@ type VolumeBindingArgs struct { // If this value is nil, the default value (600) will be used. BindTimeoutSeconds *int64 `json:"bindTimeoutSeconds,omitempty"` } + +// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object + +// NodeAffinityArgs holds arguments to configure the NodeAffinity plugin. +type NodeAffinityArgs struct { + metav1.TypeMeta `json:",inline"` + + // AddedAffinity is applied to all Pods additionally to the NodeAffinity + // specified in the PodSpec. That is, Nodes need to satisfy AddedAffinity + // AND .spec.NodeAffinity. AddedAffinity is empty by default (all Nodes + // match). + // When AddedAffinity is used, some Pods with affinity requirements that match + // a specific Node (such as Daemonset Pods) might remain unschedulable. + // +optional + AddedAffinity *corev1.NodeAffinity `json:"addedAffinity,omitempty"` +} diff --git a/staging/src/k8s.io/kube-scheduler/config/v1beta1/zz_generated.deepcopy.go b/staging/src/k8s.io/kube-scheduler/config/v1beta1/zz_generated.deepcopy.go index 00d64b2ff096..4aeaf44d8c3c 100644 --- a/staging/src/k8s.io/kube-scheduler/config/v1beta1/zz_generated.deepcopy.go +++ b/staging/src/k8s.io/kube-scheduler/config/v1beta1/zz_generated.deepcopy.go @@ -223,6 +223,36 @@ func (in *KubeSchedulerProfile) DeepCopy() *KubeSchedulerProfile { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *NodeAffinityArgs) DeepCopyInto(out *NodeAffinityArgs) { + *out = *in + out.TypeMeta = in.TypeMeta + if in.AddedAffinity != nil { + in, out := &in.AddedAffinity, &out.AddedAffinity + *out = new(corev1.NodeAffinity) + (*in).DeepCopyInto(*out) + } + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeAffinityArgs. +func (in *NodeAffinityArgs) DeepCopy() *NodeAffinityArgs { + if in == nil { + return nil + } + out := new(NodeAffinityArgs) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *NodeAffinityArgs) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeLabelArgs) DeepCopyInto(out *NodeLabelArgs) { *out = *in diff --git a/test/integration/volumescheduling/volume_binding_test.go b/test/integration/volumescheduling/volume_binding_test.go index 558754fee7cc..2f944471b214 100644 --- a/test/integration/volumescheduling/volume_binding_test.go +++ b/test/integration/volumescheduling/volume_binding_test.go @@ -693,8 +693,8 @@ func TestPVAffinityConflict(t *testing.T) { if strings.Compare(p.Status.Conditions[0].Reason, "Unschedulable") != 0 { t.Fatalf("Failed as Pod %s reason was: %s but expected: Unschedulable", podName, p.Status.Conditions[0].Reason) } - if !strings.Contains(p.Status.Conditions[0].Message, "node(s) didn't match node selector") || !strings.Contains(p.Status.Conditions[0].Message, "node(s) had volume node affinity conflict") { - t.Fatalf("Failed as Pod's %s failure message does not contain expected message: node(s) didn't match node selector, node(s) had volume node affinity conflict. Got message %q", podName, p.Status.Conditions[0].Message) + if !strings.Contains(p.Status.Conditions[0].Message, "node(s) didn't match Pod's node affinity") || !strings.Contains(p.Status.Conditions[0].Message, "node(s) had volume node affinity conflict") { + t.Fatalf("Failed as Pod's %s failure message does not contain expected message: node(s) didn't match Pod's node affinity, node(s) had volume node affinity conflict. Got message %q", podName, p.Status.Conditions[0].Message) } // Deleting test pod if err := config.client.CoreV1().Pods(config.ns).Delete(context.TODO(), podName, metav1.DeleteOptions{}); err != nil {