Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #117073: Fix: Recreate DaemonSet pods completed with Succeeded phase #117496

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
8 changes: 8 additions & 0 deletions pkg/controller/daemon/daemon_controller.go
Expand Up @@ -75,6 +75,8 @@ const (
FailedPlacementReason = "FailedPlacement"
// FailedDaemonPodReason is added to an event when the status of a Pod of a DaemonSet is 'Failed'.
FailedDaemonPodReason = "FailedDaemonPod"
// SucceededDaemonPodReason is added to an event when the status of a Pod of a DaemonSet is 'Succeeded'.
SucceededDaemonPodReason = "SucceededDaemonPod"
)

// controllerKind contains the schema.GroupVersionKind for this controller type.
Expand Down Expand Up @@ -816,6 +818,12 @@ func (dsc *DaemonSetsController) podsShouldBeOnNode(
// Emit an event so that it's discoverable to users.
dsc.eventRecorder.Eventf(ds, v1.EventTypeWarning, FailedDaemonPodReason, msg)
podsToDelete = append(podsToDelete, pod.Name)
} else if pod.Status.Phase == v1.PodSucceeded {
msg := fmt.Sprintf("Found succeeded daemon pod %s/%s on node %s, will try to delete it", pod.Namespace, pod.Name, node.Name)
klog.V(2).Infof(msg)
// Emit an event so that it's discoverable to users.
dsc.eventRecorder.Eventf(ds, v1.EventTypeNormal, SucceededDaemonPodReason, msg)
podsToDelete = append(podsToDelete, pod.Name)
} else {
daemonPodsRunning = append(daemonPodsRunning, pod)
}
Expand Down
79 changes: 79 additions & 0 deletions test/integration/daemonset/daemonset_test.go
Expand Up @@ -310,6 +310,41 @@ func validateDaemonSetPodsAndMarkReady(
}
}

func validateDaemonSetPodsActive(
podClient corev1client.PodInterface,
podInformer cache.SharedIndexInformer,
numberPods int,
t *testing.T,
) {
if err := wait.Poll(time.Second, 60*time.Second, func() (bool, error) {
objects := podInformer.GetIndexer().List()
if len(objects) < numberPods {
return false, nil
}
podsActiveCount := 0
for _, object := range objects {
pod := object.(*v1.Pod)
ownerReferences := pod.ObjectMeta.OwnerReferences
if len(ownerReferences) != 1 {
return false, fmt.Errorf("Pod %s has %d OwnerReferences, expected only 1", pod.Name, len(ownerReferences))
}
controllerRef := ownerReferences[0]
if got, want := controllerRef.Kind, "DaemonSet"; got != want {
t.Errorf("controllerRef.Kind = %q, want %q", got, want)
}
if controllerRef.Controller == nil || *controllerRef.Controller != true {
t.Errorf("controllerRef.Controller is not set to true")
}
if pod.Status.Phase == v1.PodRunning || pod.Status.Phase == v1.PodPending {
podsActiveCount += 1
}
}
return podsActiveCount == numberPods, nil
}); err != nil {
t.Fatal(err)
}
}

// podUnschedulable returns a condition function that returns true if the given pod
// gets unschedulable status.
func podUnschedulable(c clientset.Interface, podNamespace, podName string) wait.ConditionFunc {
Expand Down Expand Up @@ -496,6 +531,50 @@ func TestSimpleDaemonSetLaunchesPods(t *testing.T) {
})
}

func TestSimpleDaemonSetRestartsPodsOnTerminalPhase(t *testing.T) {
for _, podPhase := range []v1.PodPhase{v1.PodSucceeded, v1.PodFailed} {
t.Run(string(podPhase), func(tt *testing.T) {
forEachStrategy(tt, func(t *testing.T, strategy *apps.DaemonSetUpdateStrategy) {
ctx, closeFn, dc, informers, clientset := setup(t)
defer closeFn()
ns := framework.CreateNamespaceOrDie(clientset, "daemonset-restart-terminal-pod-test", t)
defer framework.DeleteNamespaceOrDie(clientset, ns, t)

dsClient := clientset.AppsV1().DaemonSets(ns.Name)
podClient := clientset.CoreV1().Pods(ns.Name)
nodeClient := clientset.CoreV1().Nodes()
podInformer := informers.Core().V1().Pods().Informer()

informers.Start(ctx.Done())
go dc.Run(ctx, 2)

ds := newDaemonSet("restart-terminal-pod", ns.Name)
ds.Spec.UpdateStrategy = *strategy
if _, err := dsClient.Create(ctx, ds, metav1.CreateOptions{}); err != nil {
t.Fatalf("Failed to create DaemonSet: %v", err)
}
defer cleanupDaemonSets(t, clientset, ds)

numNodes := 3
addNodes(nodeClient, 0, numNodes, nil, t)

validateDaemonSetPodsAndMarkReady(podClient, podInformer, numNodes, t)
validateDaemonSetStatus(dsClient, ds.Name, int32(numNodes), t)
podToMarkAsTerminal := podInformer.GetIndexer().List()[0].(*v1.Pod)
podCopy := podToMarkAsTerminal.DeepCopy()
podCopy.Status.Phase = podPhase
if _, err := podClient.UpdateStatus(ctx, podCopy, metav1.UpdateOptions{}); err != nil {
t.Fatalf("Failed to mark the pod as terminal with phase: %v. Error: %v", podPhase, err)
}
// verify all pods are active. They either continue Running or are Pending after restart
validateDaemonSetPodsActive(podClient, podInformer, numNodes, t)
validateDaemonSetPodsAndMarkReady(podClient, podInformer, numNodes, t)
validateDaemonSetStatus(dsClient, ds.Name, int32(numNodes), t)
})
})
}
}

func TestDaemonSetWithNodeSelectorLaunchesPods(t *testing.T) {
forEachStrategy(t, func(t *testing.T, strategy *apps.DaemonSetUpdateStrategy) {
ctx, closeFn, dc, informers, clientset := setup(t)
Expand Down