From 1893f639d034b5c6621eb4784a298003771b29ab Mon Sep 17 00:00:00 2001 From: David Porter Date: Wed, 29 Mar 2023 17:14:54 -0700 Subject: [PATCH] test: Fix flake in node e2e mirror pod tests The newly added `MirrorPodWithGracePeriod when create a mirror pod and the container runtime is temporarily down during pod termination` test is currently flaking because in some cases when it is run there are other pods from other tests that are still in progress of being terminated. This results in the test failing because it asserts metrics that assume that there is only one pod running on the node. To fix the flake, prior to starting the test, verify that no pods exist in the api server other then the newly created mirror pod. Signed-off-by: David Porter --- test/e2e_node/mirror_pod_grace_period_test.go | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test/e2e_node/mirror_pod_grace_period_test.go b/test/e2e_node/mirror_pod_grace_period_test.go index ec3883b3f5ce..294b5afac996 100644 --- a/test/e2e_node/mirror_pod_grace_period_test.go +++ b/test/e2e_node/mirror_pod_grace_period_test.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "os" + "strings" "time" "github.com/onsi/ginkgo/v2" @@ -32,6 +33,7 @@ import ( "k8s.io/apimachinery/pkg/util/uuid" clientset "k8s.io/client-go/kubernetes" "k8s.io/kubernetes/test/e2e/framework" + e2epod "k8s.io/kubernetes/test/e2e/framework/pod" imageutils "k8s.io/kubernetes/test/utils/image" admissionapi "k8s.io/pod-security-admission/api" ) @@ -134,6 +136,30 @@ var _ = SIGDescribe("MirrorPodWithGracePeriod", func() { }) ginkgo.Context("and the container runtime is temporarily down during pod termination [NodeConformance] [Serial] [Disruptive]", func() { + ginkgo.BeforeEach(func(ctx context.Context) { + // Ensure that prior to the test starting, no other pods are running or in the process of being terminated other than the mirror pod. + // This is necessary as the test verifies metrics that assume that there is only one pod (the static pod) being run, and all other pods have been terminated. + gomega.Eventually(ctx, func(ctx context.Context) error { + podList, err := e2epod.NewPodClient(f).List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed listing pods while waiting for all pods to be terminated: %v", err) + } + var remainingPods []string + + for _, pod := range podList.Items { + // The mirror pod is the only expected pod to be running + if pod.Name == mirrorPodName && pod.Namespace == ns { + continue + } + remainingPods = append(remainingPods, fmt.Sprintf("(%s/%s)", pod.Namespace, pod.Name)) + } + + if len(remainingPods) > 0 { + return fmt.Errorf("not all pods are terminated yet prior to starting mirror pod test: %v pods that still exist: %v", len(remainingPods), strings.Join(remainingPods, ",")) + } + return nil + }, f.Timeouts.PodDelete, f.Timeouts.Poll).Should(gomega.Succeed()) + }) ginkgo.It("the mirror pod should terminate successfully", func(ctx context.Context) { ginkgo.By("verifying the pod is described as syncing in metrics") gomega.Eventually(ctx, getKubeletMetrics, 5*time.Second, time.Second).Should(gstruct.MatchKeys(gstruct.IgnoreExtras, gstruct.Keys{