kubernetes · k8s-ci-robot · May 21, 2019 · May 9, 2019
diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go
@@ -631,16 +631,15 @@ func pastBackoffLimitOnFailure(job *batch.Job, pods []*v1.Pod) bool {
 	result := int32(0)
 	for i := range pods {
 		po := pods[i]
-		if po.Status.Phase != v1.PodRunning {
-			continue
-		}
-		for j := range po.Status.InitContainerStatuses {
-			stat := po.Status.InitContainerStatuses[j]
-			result += stat.RestartCount
-		}
-		for j := range po.Status.ContainerStatuses {
-			stat := po.Status.ContainerStatuses[j]
-			result += stat.RestartCount
+		if po.Status.Phase == v1.PodRunning || po.Status.Phase == v1.PodPending {
+			for j := range po.Status.InitContainerStatuses {
+				stat := po.Status.InitContainerStatuses[j]
+				result += stat.RestartCount
+			}
+			for j := range po.Status.ContainerStatuses {
+				stat := po.Status.ContainerStatuses[j]
+				result += stat.RestartCount
+			}
 		}
 	}
 	if *job.Spec.BackoffLimit == 0 {

diff --git a/pkg/controller/job/job_controller_test.go b/pkg/controller/job/job_controller_test.go
@@ -1414,6 +1414,7 @@ func TestJobBackoffForOnFailure(t *testing.T) {
 		// pod setup
 		jobKeyForget  bool
 		restartCounts []int32
+		podPhase      v1.PodPhase
 
 		// expectations
 		expectedActive          int32
@@ -1424,32 +1425,47 @@ func TestJobBackoffForOnFailure(t *testing.T) {
 	}{
 		"backoffLimit 0 should have 1 pod active": {
 			1, 1, 0,
-			true, []int32{0},
+			true, []int32{0}, v1.PodRunning,
 			1, 0, 0, nil, "",
 		},
 		"backoffLimit 1 with restartCount 0 should have 1 pod active": {
 			1, 1, 1,
-			true, []int32{0},
+			true, []int32{0}, v1.PodRunning,
 			1, 0, 0, nil, "",
 		},
-		"backoffLimit 1 with restartCount 1 should have 0 pod active": {
+		"backoffLimit 1 with restartCount 1 and podRunning should have 0 pod active": {
 			1, 1, 1,
-			true, []int32{1},
+			true, []int32{1}, v1.PodRunning,
 			0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded",
 		},
-		"too many job failures - single pod": {
+		"backoffLimit 1 with restartCount 1 and podPending should have 0 pod active": {
+			1, 1, 1,
+			true, []int32{1}, v1.PodPending,
+			0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded",
+		},
+		"too many job failures with podRunning - single pod": {
 			1, 5, 2,
-			true, []int32{2},
+			true, []int32{2}, v1.PodRunning,
 			0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded",
 		},
-		"too many job failures - multiple pods": {
+		"too many job failures with podPending - single pod": {
+			1, 5, 2,
+			true, []int32{2}, v1.PodPending,
+			0, 0, 1, &jobConditionFailed, "BackoffLimitExceeded",
+		},
+		"too many job failures with podRunning - multiple pods": {
+			2, 5, 2,
+			true, []int32{1, 1}, v1.PodRunning,
+			0, 0, 2, &jobConditionFailed, "BackoffLimitExceeded",
+		},
+		"too many job failures with podPending - multiple pods": {
 			2, 5, 2,
-			true, []int32{1, 1},
+			true, []int32{1, 1}, v1.PodPending,
 			0, 0, 2, &jobConditionFailed, "BackoffLimitExceeded",
 		},
 		"not enough failures": {
 			2, 5, 3,
-			true, []int32{1, 1},
+			true, []int32{1, 1}, v1.PodRunning,
 			2, 0, 0, nil, "",
 		},
 	}
@@ -1474,7 +1490,7 @@ func TestJobBackoffForOnFailure(t *testing.T) {
 			job.Spec.Template.Spec.RestartPolicy = v1.RestartPolicyOnFailure
 			sharedInformerFactory.Batch().V1().Jobs().Informer().GetIndexer().Add(job)
 			podIndexer := sharedInformerFactory.Core().V1().Pods().Informer().GetIndexer()
-			for i, pod := range newPodList(int32(len(tc.restartCounts)), v1.PodRunning, job) {
+			for i, pod := range newPodList(int32(len(tc.restartCounts)), tc.podPhase, job) {
 				pod.Status.ContainerStatuses = []v1.ContainerStatus{{RestartCount: tc.restartCounts[i]}}
 				podIndexer.Add(&pod)
 			}