Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #119317: change rolling update logic to exclude sunsetting nodes #120786

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
73 changes: 63 additions & 10 deletions pkg/controller/daemon/update.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ func (dsc *DaemonSetsController) rollingUpdate(ctx context.Context, ds *apps.Dae
if err != nil {
return fmt.Errorf("couldn't get node to daemon pod mapping for daemon set %q: %v", ds.Name, err)
}
maxSurge, maxUnavailable, err := dsc.updatedDesiredNodeCounts(ctx, ds, nodeList, nodeToDaemonPods)
maxSurge, maxUnavailable, desiredNumberScheduled, err := dsc.updatedDesiredNodeCounts(ctx, ds, nodeList, nodeToDaemonPods)
if err != nil {
return fmt.Errorf("couldn't get unavailable numbers: %v", err)
}
Expand Down Expand Up @@ -140,10 +140,12 @@ func (dsc *DaemonSetsController) rollingUpdate(ctx context.Context, ds *apps.Dae
// * An old available pod is deleted if a new pod is available
// * No more than maxSurge new pods are created for old available pods at any one time
//
var oldPodsToDelete []string
var oldPodsToDelete []string // these pods are already updated or unavailable on sunsetted node
var shouldNotRunPodsToDelete []string // candidate pods to be deleted on sunsetted nodes
var candidateNewNodes []string
var allowedNewNodes []string
var numSurge int
var numAvailable int

for nodeName, pods := range nodeToDaemonPods {
newPod, oldPod, ok := findUpdatedPodsOnNode(ds, pods, hash)
Expand All @@ -153,24 +155,54 @@ func (dsc *DaemonSetsController) rollingUpdate(ctx context.Context, ds *apps.Dae
numSurge++
continue
}

// first count availability for all the nodes (even the ones that we are sunsetting due to scheduling constraints)
if oldPod != nil {
if podutil.IsPodAvailable(oldPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}) {
numAvailable++
}
} else if newPod != nil {
if podutil.IsPodAvailable(newPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}) {
numAvailable++
}
}

switch {
case oldPod == nil:
// we don't need to do anything to this node, the manage loop will handle it
case newPod == nil:
// this is a surge candidate
switch {
case !podutil.IsPodAvailable(oldPod, ds.Spec.MinReadySeconds, metav1.Time{Time: now}):
node, err := dsc.nodeLister.Get(nodeName)
if err != nil {
return fmt.Errorf("couldn't get node for nodeName %q: %v", nodeName, err)
}
if shouldRun, _ := NodeShouldRunDaemonPod(node, ds); !shouldRun {
logger.V(5).Info("DaemonSet pod on node is not available and does not match scheduling constraints, remove old pod", "daemonset", klog.KObj(ds), "node", nodeName, "oldPod", klog.KObj(oldPod))
oldPodsToDelete = append(oldPodsToDelete, oldPod.Name)
continue
}
// the old pod isn't available, allow it to become a replacement
logger.V(5).Info("Pod on node is out of date and not available, allowing replacement", "daemonset", klog.KObj(ds), "pod", klog.KObj(oldPod), "node", klog.KRef("", nodeName))
// record the replacement
if allowedNewNodes == nil {
allowedNewNodes = make([]string, 0, len(nodeToDaemonPods))
}
allowedNewNodes = append(allowedNewNodes, nodeName)
case numSurge >= maxSurge:
// no point considering any other candidates
continue
default:
node, err := dsc.nodeLister.Get(nodeName)
if err != nil {
return fmt.Errorf("couldn't get node for nodeName %q: %v", nodeName, err)
}
if shouldRun, _ := NodeShouldRunDaemonPod(node, ds); !shouldRun {
shouldNotRunPodsToDelete = append(shouldNotRunPodsToDelete, oldPod.Name)
continue
}
if numSurge >= maxSurge {
// no point considering any other candidates
continue
}
logger.V(5).Info("DaemonSet pod on node is out of date, this is a surge candidate", "daemonset", klog.KObj(ds), "pod", klog.KObj(oldPod), "node", klog.KRef("", nodeName))
// record the candidate
if candidateNewNodes == nil {
Expand All @@ -194,6 +226,27 @@ func (dsc *DaemonSetsController) rollingUpdate(ctx context.Context, ds *apps.Dae
// use any of the candidates we can, including the allowedNewNodes
logger.V(5).Info("DaemonSet allowing replacements", "daemonset", klog.KObj(ds), "replacements", len(allowedNewNodes), "maxSurge", maxSurge, "numSurge", numSurge, "candidates", len(candidateNewNodes))
remainingSurge := maxSurge - numSurge

// With maxSurge, the application owner expects 100% availability.
// When the scheduling constraint change from node A to node B, we do not want the application to stay
// without any available pods. Only delete a pod on node A when a pod on node B becomes available.
if deletablePodsNumber := numAvailable - desiredNumberScheduled; deletablePodsNumber > 0 {
if shouldNotRunPodsToDeleteNumber := len(shouldNotRunPodsToDelete); deletablePodsNumber > shouldNotRunPodsToDeleteNumber {
deletablePodsNumber = shouldNotRunPodsToDeleteNumber
}
for _, podToDeleteName := range shouldNotRunPodsToDelete[:deletablePodsNumber] {
podToDelete, err := dsc.podLister.Pods(ds.Namespace).Get(podToDeleteName)
if err != nil {
if errors.IsNotFound(err) {
continue
}
return fmt.Errorf("couldn't get pod which should be deleted due to scheduling constraints %q: %v", podToDeleteName, err)
}
logger.V(5).Info("DaemonSet pod on node should be deleted due to scheduling constraints", "daemonset", klog.KObj(ds), "pod", klog.KObj(podToDelete), "node", podToDelete.Spec.NodeName)
oldPodsToDelete = append(oldPodsToDelete, podToDeleteName)
}
}

if remainingSurge < 0 {
remainingSurge = 0
}
Expand Down Expand Up @@ -525,9 +578,9 @@ func (dsc *DaemonSetsController) snapshot(ctx context.Context, ds *apps.DaemonSe
return history, err
}

// updatedDesiredNodeCounts calculates the true number of allowed unavailable or surge pods and
// updatedDesiredNodeCounts calculates the true number of allowed surge, unavailable or desired scheduled pods and
// updates the nodeToDaemonPods array to include an empty array for every node that is not scheduled.
func (dsc *DaemonSetsController) updatedDesiredNodeCounts(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, nodeToDaemonPods map[string][]*v1.Pod) (int, int, error) {
func (dsc *DaemonSetsController) updatedDesiredNodeCounts(ctx context.Context, ds *apps.DaemonSet, nodeList []*v1.Node, nodeToDaemonPods map[string][]*v1.Pod) (int, int, int, error) {
var desiredNumberScheduled int
logger := klog.FromContext(ctx)
for i := range nodeList {
Expand All @@ -545,12 +598,12 @@ func (dsc *DaemonSetsController) updatedDesiredNodeCounts(ctx context.Context, d

maxUnavailable, err := util.UnavailableCount(ds, desiredNumberScheduled)
if err != nil {
return -1, -1, fmt.Errorf("invalid value for MaxUnavailable: %v", err)
return -1, -1, -1, fmt.Errorf("invalid value for MaxUnavailable: %v", err)
}

maxSurge, err := util.SurgeCount(ds, desiredNumberScheduled)
if err != nil {
return -1, -1, fmt.Errorf("invalid value for MaxSurge: %v", err)
return -1, -1, -1, fmt.Errorf("invalid value for MaxSurge: %v", err)
}

// if the daemonset returned with an impossible configuration, obey the default of unavailable=1 (in the
Expand All @@ -560,7 +613,7 @@ func (dsc *DaemonSetsController) updatedDesiredNodeCounts(ctx context.Context, d
maxUnavailable = 1
}
logger.V(5).Info("DaemonSet with maxSurge and maxUnavailable", "daemonset", klog.KObj(ds), "maxSurge", maxSurge, "maxUnavailable", maxUnavailable)
return maxSurge, maxUnavailable, nil
return maxSurge, maxUnavailable, desiredNumberScheduled, nil
}

type historiesByRevision []*apps.ControllerRevision
Expand Down
136 changes: 108 additions & 28 deletions pkg/controller/daemon/update_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,79 @@ func TestDaemonSetUpdatesPodsWithMaxSurge(t *testing.T) {
expectSyncDaemonSets(t, manager, ds, podControl, 0, 0, 0)
}

func TestDaemonSetUpdatesWhenNewPosIsNotReady(t *testing.T) {
func TestDaemonSetUpdatesPodsNotMatchTainstWithMaxSurge(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)

ds := newDaemonSet("foo")
maxSurge := 1
ds.Spec.UpdateStrategy = newUpdateSurge(intstr.FromInt(maxSurge))
tolerations := []v1.Toleration{
{Key: "node-role.kubernetes.io/control-plane", Operator: v1.TolerationOpExists},
}
setDaemonSetToleration(ds, tolerations)
manager, podControl, _, err := newTestController(ctx, ds)
if err != nil {
t.Fatalf("error creating DaemonSets controller: %v", err)
}
err = manager.dsStore.Add(ds)
if err != nil {
t.Fatal(err)
}

// Add five nodes and taint to one node
addNodes(manager.nodeStore, 0, 5, nil)
taints := []v1.Taint{
{Key: "node-role.kubernetes.io/control-plane", Effect: v1.TaintEffectNoSchedule},
}
node := newNode("node-0", nil)
setNodeTaint(node, taints)
err = manager.nodeStore.Update(node)
if err != nil {
t.Fatal(err)
}

// Create DaemonSet with toleration
expectSyncDaemonSets(t, manager, ds, podControl, 5, 0, 0)
markPodsReady(podControl.podStore)

// RollingUpdate DaemonSet without toleration
ds.Spec.Template.Spec.Tolerations = nil
err = manager.dsStore.Update(ds)
if err != nil {
t.Fatal(err)
}

clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, maxSurge, 1, 0)
clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, 0, 0, 0)
markPodsReady(podControl.podStore)

clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, maxSurge, maxSurge, 0)
clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, 0, 0, 0)
markPodsReady(podControl.podStore)

clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, maxSurge, maxSurge, 0)
clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, 0, 0, 0)
markPodsReady(podControl.podStore)

clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, maxSurge, maxSurge, 0)
clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, 0, 0, 0)
markPodsReady(podControl.podStore)

clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, 0, maxSurge, 0)
clearExpectations(t, manager, ds, podControl)
expectSyncDaemonSets(t, manager, ds, podControl, 0, 0, 0)
}

func TestDaemonSetUpdatesWhenNewPodIsNotReady(t *testing.T) {
_, ctx := ktesting.NewTestContext(t)
ds := newDaemonSet("foo")
manager, podControl, _, err := newTestController(ctx, ds)
Expand Down Expand Up @@ -379,14 +451,15 @@ func newUpdateUnavailable(value intstr.IntOrString) apps.DaemonSetUpdateStrategy

func TestGetUnavailableNumbers(t *testing.T) {
cases := []struct {
name string
ManagerFunc func(ctx context.Context) *daemonSetsController
ds *apps.DaemonSet
nodeToPods map[string][]*v1.Pod
maxSurge int
maxUnavailable int
emptyNodes int
Err error
name string
ManagerFunc func(ctx context.Context) *daemonSetsController
ds *apps.DaemonSet
nodeToPods map[string][]*v1.Pod
maxSurge int
maxUnavailable int
desiredNumberScheduled int
emptyNodes int
Err error
}{
{
name: "No nodes",
Expand Down Expand Up @@ -431,8 +504,9 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-1"] = []*v1.Pod{pod1}
return mapping
}(),
maxUnavailable: 1,
emptyNodes: 0,
maxUnavailable: 1,
desiredNumberScheduled: 2,
emptyNodes: 0,
},
{
name: "Two nodes, one node without pods",
Expand All @@ -456,8 +530,9 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-0"] = []*v1.Pod{pod0}
return mapping
}(),
maxUnavailable: 1,
emptyNodes: 1,
maxUnavailable: 1,
desiredNumberScheduled: 2,
emptyNodes: 1,
},
{
name: "Two nodes, one node without pods, surge",
Expand All @@ -481,8 +556,9 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-0"] = []*v1.Pod{pod0}
return mapping
}(),
maxUnavailable: 1,
emptyNodes: 1,
maxUnavailable: 1,
desiredNumberScheduled: 2,
emptyNodes: 1,
},
{
name: "Two nodes with pods, MaxUnavailable in percents",
Expand All @@ -509,8 +585,9 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-1"] = []*v1.Pod{pod1}
return mapping
}(),
maxUnavailable: 1,
emptyNodes: 0,
maxUnavailable: 1,
desiredNumberScheduled: 2,
emptyNodes: 0,
},
{
name: "Two nodes with pods, MaxUnavailable in percents, surge",
Expand All @@ -537,9 +614,10 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-1"] = []*v1.Pod{pod1}
return mapping
}(),
maxSurge: 1,
maxUnavailable: 0,
emptyNodes: 0,
maxSurge: 1,
maxUnavailable: 0,
desiredNumberScheduled: 2,
emptyNodes: 0,
},
{
name: "Two nodes with pods, MaxUnavailable is 100%, surge",
Expand All @@ -566,9 +644,10 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-1"] = []*v1.Pod{pod1}
return mapping
}(),
maxSurge: 2,
maxUnavailable: 0,
emptyNodes: 0,
maxSurge: 2,
maxUnavailable: 0,
desiredNumberScheduled: 2,
emptyNodes: 0,
},
{
name: "Two nodes with pods, MaxUnavailable in percents, pod terminating",
Expand Down Expand Up @@ -597,8 +676,9 @@ func TestGetUnavailableNumbers(t *testing.T) {
mapping["node-1"] = []*v1.Pod{pod1}
return mapping
}(),
maxUnavailable: 2,
emptyNodes: 1,
maxUnavailable: 2,
desiredNumberScheduled: 3,
emptyNodes: 1,
},
}

Expand All @@ -611,7 +691,7 @@ func TestGetUnavailableNumbers(t *testing.T) {
if err != nil {
t.Fatalf("error listing nodes: %v", err)
}
maxSurge, maxUnavailable, err := manager.updatedDesiredNodeCounts(ctx, c.ds, nodeList, c.nodeToPods)
maxSurge, maxUnavailable, desiredNumberScheduled, err := manager.updatedDesiredNodeCounts(ctx, c.ds, nodeList, c.nodeToPods)
if err != nil && c.Err != nil {
if c.Err != err {
t.Fatalf("Expected error: %v but got: %v", c.Err, err)
Expand All @@ -620,8 +700,8 @@ func TestGetUnavailableNumbers(t *testing.T) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if maxSurge != c.maxSurge || maxUnavailable != c.maxUnavailable {
t.Errorf("Wrong values. maxSurge: %d, expected %d, maxUnavailable: %d, expected: %d", maxSurge, c.maxSurge, maxUnavailable, c.maxUnavailable)
if maxSurge != c.maxSurge || maxUnavailable != c.maxUnavailable || desiredNumberScheduled != c.desiredNumberScheduled {
t.Errorf("Wrong values. maxSurge: %d, expected %d, maxUnavailable: %d, expected: %d, desiredNumberScheduled: %d, expected: %d", maxSurge, c.maxSurge, maxUnavailable, c.maxUnavailable, desiredNumberScheduled, c.desiredNumberScheduled)
}
var emptyNodes int
for _, pods := range c.nodeToPods {
Expand Down