Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wait for terminating pods to be deleted #85226

Merged
merged 1 commit into from Nov 14, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
61 changes: 50 additions & 11 deletions test/e2e/scheduling/framework.go
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package scheduling

import (
"fmt"
"time"

"github.com/onsi/ginkgo"
Expand All @@ -29,20 +30,23 @@ import (
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
)

var (
timeout = 10 * time.Minute
waitTime = 2 * time.Second
)

// SIGDescribe annotates the test with the SIG label.
func SIGDescribe(text string, body func()) bool {
return ginkgo.Describe("[sig-scheduling] "+text, body)
}

// WaitForStableCluster waits until all existing pods are scheduled and returns their amount.
func WaitForStableCluster(c clientset.Interface, masterNodes sets.String) int {
timeout := 10 * time.Minute
startTime := time.Now()

// Wait for all pods to be scheduled.
allScheduledPods, allNotScheduledPods := getFilteredPods(c, masterNodes, metav1.NamespaceAll)
allScheduledPods, allNotScheduledPods := getScheduledAndUnscheduledPods(c, masterNodes, metav1.NamespaceAll)
for len(allNotScheduledPods) != 0 {
time.Sleep(2 * time.Second)
time.Sleep(waitTime)
if startTime.Add(timeout).Before(time.Now()) {
framework.Logf("Timed out waiting for the following pods to schedule")
for _, p := range allNotScheduledPods {
Expand All @@ -51,22 +55,57 @@ func WaitForStableCluster(c clientset.Interface, masterNodes sets.String) int {
framework.Failf("Timed out after %v waiting for stable cluster.", timeout)
break
}
allScheduledPods, allNotScheduledPods = getFilteredPods(c, masterNodes, metav1.NamespaceAll)
allScheduledPods, allNotScheduledPods = getScheduledAndUnscheduledPods(c, masterNodes, metav1.NamespaceAll)
}
return len(allScheduledPods)
}

// getFilteredPods lists scheduled and not scheduled pods in the given namespace, with succeeded and failed pods filtered out.
func getFilteredPods(c clientset.Interface, masterNodes sets.String, ns string) (scheduledPods, notScheduledPods []v1.Pod) {
// WaitForPodsToBeDeleted waits until pods that are terminating to get deleted.
func WaitForPodsToBeDeleted(c clientset.Interface) {
startTime := time.Now()
deleting := getDeletingPods(c, metav1.NamespaceAll)
for len(deleting) != 0 {
if startTime.Add(timeout).Before(time.Now()) {
framework.Logf("Pods still not deleted")
for _, p := range deleting {
framework.Logf("%v/%v", p.Namespace, p.Name)
}
framework.Failf("Timed out after %v waiting for pods to be deleted", timeout)
break
}
time.Sleep(waitTime)
deleting = getDeletingPods(c, metav1.NamespaceAll)
}
}

// getScheduledAndUnscheduledPods lists scheduled and not scheduled pods in the given namespace, with succeeded and failed pods filtered out.
func getScheduledAndUnscheduledPods(c clientset.Interface, masterNodes sets.String, ns string) (scheduledPods, notScheduledPods []v1.Pod) {
pods, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
framework.ExpectNoError(err, "listing all pods in kube-system namespace while waiting for stable cluster")
framework.ExpectNoError(err, fmt.Sprintf("listing all pods in namespace %q while waiting for stable cluster", ns))
// API server returns also Pods that succeeded. We need to filter them out.
filteredPods := make([]v1.Pod, 0, len(pods.Items))
for _, pod := range pods.Items {
if pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed {
filteredPods = append(filteredPods, pod)
for _, p := range pods.Items {
if !podTerminated(p) {
filteredPods = append(filteredPods, p)
}
}
pods.Items = filteredPods
return e2epod.GetPodsScheduled(masterNodes, pods)
}

// getDeletingPods returns whether there are any pods marked for deletion.
func getDeletingPods(c clientset.Interface, ns string) []v1.Pod {
pods, err := c.CoreV1().Pods(ns).List(metav1.ListOptions{})
framework.ExpectNoError(err, fmt.Sprintf("listing all pods in namespace %q while waiting for pods to terminate", ns))
var deleting []v1.Pod
for _, p := range pods.Items {
if p.ObjectMeta.DeletionTimestamp != nil && !podTerminated(p) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just to double check my understanding, we could remove the && !podTerminted condition and we would still be ok, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could a pod have a deletiontimestamp set and not yet be in the succeeded/failed state? In that case it would still affect the pod counts

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not entirely sure. I imagine different controllers (kubelet or apiserver) might update the fields or remove the pods at different times.

deleting = append(deleting, p)
}
}
return deleting
}

func podTerminated(p v1.Pod) bool {
return p.Status.Phase == v1.PodSucceeded || p.Status.Phase == v1.PodFailed
}
1 change: 1 addition & 0 deletions test/e2e/scheduling/predicates.go
Expand Up @@ -129,6 +129,7 @@ var _ = SIGDescribe("SchedulerPredicates [Serial]", func() {
totalPodCapacity += podCapacity.Value()
}

WaitForPodsToBeDeleted(cs)
currentlyScheduledPods := WaitForStableCluster(cs, masterNodes)
podsNeededForSaturation := int(totalPodCapacity) - currentlyScheduledPods

Expand Down