Skip to content

Commit

Permalink
UPSTREAM: 95233: Add error text to kube-system wait error
Browse files Browse the repository at this point in the history
On HA API server hiccups we saw a prow job with error:

fail [k8s.io/kubernetes@v1.19.0/test/e2e/e2e.go:284]: Sep 30 17:06:08.313: Error waiting for all pods to be running and ready: 0 / 0 pods in namespace "kube-system" are NOT in RUNNING and READY state in 10m0s
POD NODE PHASE GRACE CONDITIONS

The failure should include the last error from API server, if it's
available:

fail [k8s.io/kubernetes@v1.19.0/test/e2e/e2e.go:284]: Oct  1 11:29:45.220: Error waiting for all pods to be running and ready: 0 / 0 pods in namespace "kube-system" are NOT in RUNNING and READY state in 10m0s
Last error: Get "https://localhost:6443/api/v1/namespaces/kube-system/replicationcontrollers": dial tcp [::1]:6443: connect: connection refused
POD NODE PHASE GRACE CONDITIONS
  • Loading branch information
jsafrane committed Oct 23, 2020
1 parent e67f5dc commit bd3a460
Showing 1 changed file with 11 additions and 2 deletions.
13 changes: 11 additions & 2 deletions test/e2e/framework/pod/wait.go
Expand Up @@ -67,8 +67,11 @@ const (
type podCondition func(pod *v1.Pod) (bool, error)

// errorBadPodsStates create error message of basic info of bad pods for debugging.
func errorBadPodsStates(badPods []v1.Pod, desiredPods int, ns, desiredState string, timeout time.Duration) string {
func errorBadPodsStates(badPods []v1.Pod, desiredPods int, ns, desiredState string, timeout time.Duration, err error) string {
errStr := fmt.Sprintf("%d / %d pods in namespace %q are NOT in %s state in %v\n", len(badPods), desiredPods, ns, desiredState, timeout)
if err != nil {
errStr += fmt.Sprintf("Last error: %s\n", err)
}
// Print bad pods info only if there are fewer than 10 bad pods
if len(badPods) > 10 {
return errStr + "There are too many bad pods. Please check log for details."
Expand Down Expand Up @@ -110,17 +113,21 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN
badPods := []v1.Pod{}
desiredPods := 0
notReady := int32(0)
var lastAPIError error

if wait.PollImmediate(poll, timeout, func() (bool, error) {
// We get the new list of pods, replication controllers, and
// replica sets in every iteration because more pods come
// online during startup and we want to ensure they are also
// checked.
replicas, replicaOk := int32(0), int32(0)
// Clear API error from the last attempt in case the following calls succeed.
lastAPIError = nil

rcList, err := c.CoreV1().ReplicationControllers(ns).List(context.TODO(), metav1.ListOptions{})
if err != nil {
e2elog.Logf("Error getting replication controllers in namespace '%s': %v", ns, err)
lastAPIError = err
return false, err
}
for _, rc := range rcList.Items {
Expand All @@ -130,6 +137,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN

rsList, err := c.AppsV1().ReplicaSets(ns).List(context.TODO(), metav1.ListOptions{})
if err != nil {
lastAPIError = err
e2elog.Logf("Error getting replication sets in namespace %q: %v", ns, err)
return false, err
}
Expand All @@ -140,6 +148,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN

podList, err := c.CoreV1().Pods(ns).List(context.TODO(), metav1.ListOptions{})
if err != nil {
lastAPIError = err
e2elog.Logf("Error getting pods in namespace '%s': %v", ns, err)
return false, err
}
Expand Down Expand Up @@ -184,7 +193,7 @@ func WaitForPodsRunningReady(c clientset.Interface, ns string, minPods, allowedN
return false, nil
}) != nil {
if !ignoreNotReady {
return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "RUNNING and READY", timeout))
return errors.New(errorBadPodsStates(badPods, desiredPods, ns, "RUNNING and READY", timeout, lastAPIError))
}
e2elog.Logf("Number of not-ready pods (%d) is below the allowed threshold (%d).", notReady, allowedNotReadyPods)
}
Expand Down

0 comments on commit bd3a460

Please sign in to comment.