Skip to content

Commit

Permalink
Only declare job as finished after removing all finalizers
Browse files Browse the repository at this point in the history
Change-Id: Id4b01b0e6fabe24134e57e687356e0fc613cead4
  • Loading branch information
alculquicondor committed Jul 7, 2023
1 parent f7cd137 commit 63e217d
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 7 deletions.
11 changes: 5 additions & 6 deletions pkg/controller/job/job_controller.go
Expand Up @@ -750,11 +750,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (forget bool, rEr
var finishedCondition *batch.JobCondition

jobHasNewFailure := failed > job.Status.Failed
// new failures happen when status does not reflect the failures and active
// is different than parallelism, otherwise the previous controller loop
// failed updating status so even if we pick up failure it is not a new one
exceedsBackoffLimit := jobHasNewFailure && (active != *job.Spec.Parallelism) &&
(failed > *job.Spec.BackoffLimit)
exceedsBackoffLimit := job.Spec.BackoffLimit != nil && failed > *job.Spec.BackoffLimit

if exceedsBackoffLimit || pastBackoffLimitOnFailure(&job, pods) {
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
Expand Down Expand Up @@ -1019,6 +1015,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
if cleanUncountedPodsWithoutFinalizers(&job.Status, uidsWithFinalizer) {
needsFlush = true
}
reachedMaxUncountedPods := false
for _, pod := range pods {
if !hasJobTrackingFinalizer(pod) || expectedRmFinalizers.Has(string(pod.UID)) {
continue
Expand Down Expand Up @@ -1061,6 +1058,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
//
// The job will be synced again because the Job status and Pod updates
// will put the Job back to the work queue.
reachedMaxUncountedPods = true
break
}
}
Expand All @@ -1077,7 +1075,8 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
if job, needsFlush, err = jm.flushUncountedAndRemoveFinalizers(ctx, job, podsToRemoveFinalizer, uidsWithFinalizer, &oldCounters, needsFlush); err != nil {
return err
}
if jm.enactJobFinished(job, finishedCond) {
jobFinished := !reachedMaxUncountedPods && jm.enactJobFinished(job, finishedCond)
if jobFinished {
needsFlush = true
}
if needsFlush {
Expand Down
4 changes: 3 additions & 1 deletion test/integration/job/job_test.go
Expand Up @@ -583,7 +583,9 @@ func TestOrphanPodsFinalizersClearedWithGC(t *testing.T) {

func TestFinalizersClearedWhenBackoffLimitExceeded(t *testing.T) {
defer featuregatetesting.SetFeatureGateDuringTest(t, feature.DefaultFeatureGate, features.JobTrackingWithFinalizers, true)()

// Set a maximum number of uncounted pods below parallelism, to ensure it
// doesn't affect the termination of pods.
t.Cleanup(setDuringTest(&jobcontroller.MaxUncountedPods, 50))
closeFn, restConfig, clientSet, ns := setup(t, "simple")
defer closeFn()
ctx, cancel := startJobControllerAndWaitForCaches(restConfig)
Expand Down

0 comments on commit 63e217d

Please sign in to comment.