diff --git a/pkg/controller/job/job_controller.go b/pkg/controller/job/job_controller.go index 2f30d168fa2ae..cb81775f4160b 100644 --- a/pkg/controller/job/job_controller.go +++ b/pkg/controller/job/job_controller.go @@ -754,12 +754,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr var manageJobErr error var finishedCondition *batch.JobCondition - jobHasNewFailure := failed > job.Status.Failed - // new failures happen when status does not reflect the failures and active - // is different than parallelism, otherwise the previous controller loop - // failed updating status so even if we pick up failure it is not a new one - exceedsBackoffLimit := jobHasNewFailure && (active != *job.Spec.Parallelism) && - (failed > *job.Spec.BackoffLimit) + exceedsBackoffLimit := failed > *job.Spec.BackoffLimit if feature.DefaultFeatureGate.Enabled(features.JobPodFailurePolicy) { if failureTargetCondition := findConditionByType(job.Status.Conditions, batch.JobFailureTarget); failureTargetCondition != nil { @@ -1043,6 +1038,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job needsFlush = true } podFailureCountByPolicyAction := map[string]int{} + reachedMaxUncountedPods := false for _, pod := range pods { if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) { // This pod was processed in a previous sync. @@ -1107,6 +1103,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job // // The job will be synced again because the Job status and Pod updates // will put the Job back to the work queue. + reachedMaxUncountedPods = true break } } @@ -1135,7 +1132,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job if job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, podFailureCountByPolicyAction, needsFlush); err != nil { return err } - jobFinished := jm.enactJobFinished(job, finishedCond) + jobFinished := !reachedMaxUncountedPods && jm.enactJobFinished(job, finishedCond) if jobFinished { needsFlush = true } diff --git a/test/integration/job/job_test.go b/test/integration/job/job_test.go index 84a9e85a1f7e5..8ff90550a733f 100644 --- a/test/integration/job/job_test.go +++ b/test/integration/job/job_test.go @@ -1167,6 +1167,9 @@ func TestOrphanPodsFinalizersClearedWithGC(t *testing.T) { } func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) { + // Set a maximum number of uncounted pods below parallelism, to ensure it + // doesn't affect the termination of pods. + t.Cleanup(setDuringTest(&jobcontroller.MaxUncountedPods, 50)) closeFn, restConfig, clientSet, ns := setup(t, "simple") defer closeFn() ctx, cancel := startJobControllerAndWaitForCaches(restConfig)