Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Skip check for all topology labels when using system default spreading #105046

Merged
merged 1 commit into from Sep 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/scheduler/framework/plugins/podtopologyspread/plugin.go
Expand Up @@ -53,6 +53,7 @@ var systemDefaultConstraints = []v1.TopologySpreadConstraint{

// PodTopologySpread is a plugin that ensures pod's topologySpreadConstraints is satisfied.
type PodTopologySpread struct {
systemDefaulted bool
parallelizer parallelize.Parallelizer
defaultConstraints []v1.TopologySpreadConstraint
sharedLister framework.SharedLister
Expand Down Expand Up @@ -97,6 +98,7 @@ func New(plArgs runtime.Object, h framework.Handle) (framework.Plugin, error) {
}
if args.DefaultingType == config.SystemDefaulting {
pl.defaultConstraints = systemDefaultConstraints
pl.systemDefaulted = true
}
if len(pl.defaultConstraints) != 0 {
if h.SharedInformerFactory() == nil {
Expand Down
12 changes: 8 additions & 4 deletions pkg/scheduler/framework/plugins/podtopologyspread/scoring.go
Expand Up @@ -56,7 +56,7 @@ func (s *preScoreState) Clone() framework.StateData {
// 1) s.TopologyPairToPodCounts: keyed with both eligible topology pair and node names.
// 2) s.IgnoredNodes: the set of nodes that shouldn't be scored.
// 3) s.TopologyNormalizingWeight: The weight to be given to each constraint based on the number of values in a topology.
func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*v1.Node) error {
func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, filteredNodes []*v1.Node, requireAllTopologies bool) error {
var err error
if len(pod.Spec.TopologySpreadConstraints) > 0 {
s.Constraints, err = filterTopologySpreadConstraints(pod.Spec.TopologySpreadConstraints, v1.ScheduleAnyway)
Expand All @@ -74,7 +74,7 @@ func (pl *PodTopologySpread) initPreScoreState(s *preScoreState, pod *v1.Pod, fi
}
topoSize := make([]int, len(s.Constraints))
for _, node := range filteredNodes {
if !nodeLabelsMatchSpreadConstraints(node.Labels, s.Constraints) {
if requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Labels, s.Constraints) {
// Nodes which don't have all required topologyKeys present are ignored
// when scoring later.
s.IgnoredNodes.Insert(node.Name)
Expand Down Expand Up @@ -125,7 +125,11 @@ func (pl *PodTopologySpread) PreScore(
IgnoredNodes: sets.NewString(),
TopologyPairToPodCounts: make(map[topologyPair]*int64),
}
err = pl.initPreScoreState(state, pod, filteredNodes)
// Only require that nodes have all the topology labels if using
// non-system-default spreading rules. This allows nodes that don't have a
// zone label to still have hostname spreading.
requireAllTopologies := len(pod.Spec.TopologySpreadConstraints) > 0 || !pl.systemDefaulted
err = pl.initPreScoreState(state, pod, filteredNodes, requireAllTopologies)
if err != nil {
return framework.AsStatus(fmt.Errorf("calculating preScoreState: %w", err))
}
Expand All @@ -147,7 +151,7 @@ func (pl *PodTopologySpread) PreScore(
// (1) `node` should satisfy incoming pod's NodeSelector/NodeAffinity
// (2) All topologyKeys need to be present in `node`
match, _ := requiredNodeAffinity.Match(node)
if !match || !nodeLabelsMatchSpreadConstraints(node.Labels, state.Constraints) {
if !match || (requireAllTopologies && !nodeLabelsMatchSpreadConstraints(node.Labels, state.Constraints)) {
return
}

Expand Down
50 changes: 46 additions & 4 deletions pkg/scheduler/framework/plugins/podtopologyspread/scoring_test.go
Expand Up @@ -123,6 +123,10 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
},
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Label(v1.LabelTopologyZone, "mars").Obj(),
st.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Label(v1.LabelTopologyZone, "mars").Obj(),
// Nodes with no zone are not excluded. They are considered a separate zone.
st.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(),
st.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(),
},
objs: []runtime.Object{
&appsv1.ReplicaSet{ObjectMeta: metav1.ObjectMeta{Namespace: "default", Name: "rs1"}, Spec: appsv1.ReplicaSetSpec{Selector: st.MakeLabelSelector().Exists("foo").Obj()}},
Expand All @@ -143,8 +147,9 @@ func TestPreScoreStateEmptyNodes(t *testing.T) {
IgnoredNodes: sets.NewString(),
TopologyPairToPodCounts: map[topologyPair]*int64{
{key: v1.LabelTopologyZone, value: "mars"}: pointer.Int64Ptr(0),
{key: v1.LabelTopologyZone, value: ""}: pointer.Int64Ptr(0),
},
TopologyNormalizingWeight: []float64{topologyNormalizingWeight(1), topologyNormalizingWeight(1)},
TopologyNormalizingWeight: []float64{topologyNormalizingWeight(4), topologyNormalizingWeight(2)},
},
},
{
Expand Down Expand Up @@ -277,6 +282,7 @@ func TestPodTopologySpreadScore(t *testing.T) {
existingPods []*v1.Pod
nodes []*v1.Node
failedNodes []*v1.Node // nodes + failedNodes = all nodes
objs []runtime.Object
want framework.NodeScoreList
}{
// Explanation on the Legend:
Expand Down Expand Up @@ -427,6 +433,40 @@ func TestPodTopologySpreadScore(t *testing.T) {
{Name: "node-d", Score: 100},
},
},
{
name: "system defaulting, nodes don't have zone, pods match service",
pod: st.MakePod().Name("p").Label("foo", "").Obj(),
existingPods: []*v1.Pod{
// matching pods spread as 4/3/2/1.
st.MakePod().Name("p-a1").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-a2").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-a3").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-a4").Node("node-a").Label("foo", "").Obj(),
st.MakePod().Name("p-b1").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-b2").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-b3").Node("node-b").Label("foo", "").Obj(),
st.MakePod().Name("p-c1").Node("node-c").Label("foo", "").Obj(),
st.MakePod().Name("p-c2").Node("node-c").Label("foo", "").Obj(),
st.MakePod().Name("p-d1").Node("node-d").Label("foo", "").Obj(),
},
nodes: []*v1.Node{
st.MakeNode().Name("node-a").Label(v1.LabelHostname, "node-a").Obj(),
st.MakeNode().Name("node-b").Label(v1.LabelHostname, "node-b").Obj(),
st.MakeNode().Name("node-c").Label(v1.LabelHostname, "node-c").Obj(),
st.MakeNode().Name("node-d").Label(v1.LabelHostname, "node-d").Obj(),
Comment on lines +453 to +456
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For curiosity: in addition to zone label, should we consider the case that all or partial nodes don't carry standard host label?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that even possible? It seems that kubelet takes care of it https://kubernetes.io/docs/reference/labels-annotations-taints/#kubernetesiohostname

Interestingly, given that we special-case hostname to do the count in Score (instead of PreScore), we could make it work. However, I don't think we need to support that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is that even possible?

I was just curious. Personally I'm with you to believe it's sane defaults for every cluster.

},
failedNodes: []*v1.Node{},
objs: []runtime.Object{
&v1.Service{Spec: v1.ServiceSpec{Selector: map[string]string{"foo": ""}}},
},
want: []framework.NodeScore{
// Same scores as if we were using one spreading constraint.
{Name: "node-a", Score: 33},
{Name: "node-b", Score: 55},
{Name: "node-c", Score: 77},
{Name: "node-d", Score: 100},
},
},
{
// matching pods spread as 4/2/1/~3~ (node4 is not a candidate)
name: "one constraint on node, 3 out of 4 nodes are candidates",
Expand Down Expand Up @@ -686,10 +726,12 @@ func TestPodTopologySpreadScore(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
t.Cleanup(cancel)
allNodes := append([]*v1.Node{}, tt.nodes...)
allNodes = append(allNodes, tt.failedNodes...)
state := framework.NewCycleState()
pl := plugintesting.SetupPlugin(t, New, &config.PodTopologySpreadArgs{DefaultingType: config.ListDefaulting}, cache.NewSnapshot(tt.existingPods, allNodes))
pl := plugintesting.SetupPluginWithInformers(ctx, t, New, &config.PodTopologySpreadArgs{DefaultingType: config.SystemDefaulting}, cache.NewSnapshot(tt.existingPods, allNodes), tt.objs)
p := pl.(*PodTopologySpread)

status := p.PreScore(context.Background(), state, tt.pod, tt.nodes)
Expand All @@ -700,14 +742,14 @@ func TestPodTopologySpreadScore(t *testing.T) {
var gotList framework.NodeScoreList
for _, n := range tt.nodes {
nodeName := n.Name
score, status := p.Score(context.Background(), state, tt.pod, nodeName)
score, status := p.Score(ctx, state, tt.pod, nodeName)
if !status.IsSuccess() {
t.Errorf("unexpected error: %v", status)
}
gotList = append(gotList, framework.NodeScore{Name: nodeName, Score: score})
}

status = p.NormalizeScore(context.Background(), state, tt.pod, gotList)
status = p.NormalizeScore(ctx, state, tt.pod, gotList)
if !status.IsSuccess() {
t.Errorf("unexpected error: %v", status)
}
Expand Down