Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] Feature/job failure policy #48075

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 6 additions & 1 deletion api/openapi-spec/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -57557,10 +57557,15 @@
],
"properties": {
"activeDeadlineSeconds": {
"description": "Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer",
"description": "Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer",
"type": "integer",
"format": "int64"
},
"backoffLimit": {
"description": "Specifies the number of retries before marking this job failed. Defaults to 6",
"type": "integer",
"format": "int32"
},
"completions": {
"description": "Specifies the desired number of successfully finished pods the job should be run with. Setting to nil means that the success of any pod signals the success of all pods, and allows parallelism to have any positive value. Setting to 1 means that parallelism is limited to 1 and the success of that pod signals the success of the job. More info: https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/",
"type": "integer",
Expand Down
7 changes: 6 additions & 1 deletion api/swagger-spec/batch_v1.json
Original file line number Diff line number Diff line change
Expand Up @@ -1385,7 +1385,12 @@
"activeDeadlineSeconds": {
"type": "integer",
"format": "int64",
"description": "Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer"
"description": "Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer"
},
"backoffLimit": {
"type": "integer",
"format": "int32",
"description": "Specifies the number of retries before marking this job failed. Defaults to 6"
},
"selector": {
"$ref": "v1.LabelSelector",
Expand Down
7 changes: 6 additions & 1 deletion api/swagger-spec/batch_v1beta1.json
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,12 @@
"activeDeadlineSeconds": {
"type": "integer",
"format": "int64",
"description": "Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer"
"description": "Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer"
},
"backoffLimit": {
"type": "integer",
"format": "int32",
"description": "Specifies the number of retries before marking this job failed. Defaults to 6"
},
"selector": {
"$ref": "v1.LabelSelector",
Expand Down
7 changes: 6 additions & 1 deletion api/swagger-spec/batch_v2alpha1.json
Original file line number Diff line number Diff line change
Expand Up @@ -1440,7 +1440,12 @@
"activeDeadlineSeconds": {
"type": "integer",
"format": "int64",
"description": "Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer"
"description": "Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer"
},
"backoffLimit": {
"type": "integer",
"format": "int32",
"description": "Specifies the number of retries before marking this job failed. Defaults to 6"
},
"selector": {
"$ref": "v1.LabelSelector",
Expand Down
9 changes: 8 additions & 1 deletion docs/api-reference/batch/v1/definitions.html
Original file line number Diff line number Diff line change
Expand Up @@ -2396,12 +2396,19 @@ <h3 id="_v1_jobspec">v1.JobSpec</h3>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">activeDeadlineSeconds</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">integer (int64)</p></td>
<td class="tableblock halign-left valign-top"></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">backoffLimit</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the number of retries before marking this job failed. Defaults to 6</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">integer (int32)</p></td>
<td class="tableblock halign-left valign-top"></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">selector</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">A label query over pods that should match the pod count. Normally, the system sets this field for you. More info: <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors">https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors</a></p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
Expand Down
9 changes: 8 additions & 1 deletion docs/api-reference/batch/v1beta1/definitions.html
Original file line number Diff line number Diff line change
Expand Up @@ -2430,12 +2430,19 @@ <h3 id="_v1_jobspec">v1.JobSpec</h3>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">activeDeadlineSeconds</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">integer (int64)</p></td>
<td class="tableblock halign-left valign-top"></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">backoffLimit</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the number of retries before marking this job failed. Defaults to 6</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">integer (int32)</p></td>
<td class="tableblock halign-left valign-top"></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">selector</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">A label query over pods that should match the pod count. Normally, the system sets this field for you. More info: <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors">https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors</a></p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
Expand Down
9 changes: 8 additions & 1 deletion docs/api-reference/batch/v2alpha1/definitions.html
Original file line number Diff line number Diff line change
Expand Up @@ -2403,12 +2403,19 @@ <h3 id="_v1_jobspec">v1.JobSpec</h3>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">activeDeadlineSeconds</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Optional duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the duration in seconds relative to the startTime that the job may be active before the system tries to terminate it; value must be positive integer</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">integer (int64)</p></td>
<td class="tableblock halign-left valign-top"></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">backoffLimit</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">Specifies the number of retries before marking this job failed. Defaults to 6</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">integer (int32)</p></td>
<td class="tableblock halign-left valign-top"></td>
</tr>
<tr>
<td class="tableblock halign-left valign-top"><p class="tableblock">selector</p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">A label query over pods that should match the pod count. Normally, the system sets this field for you. More info: <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors">https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors</a></p></td>
<td class="tableblock halign-left valign-top"><p class="tableblock">false</p></td>
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/batch/fuzzer/fuzzer.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,10 @@ var Funcs = func(codecs runtimeserializer.CodecFactory) []interface{} {
c.FuzzNoCustom(j) // fuzz self without calling this function again
completions := int32(c.Rand.Int31())
parallelism := int32(c.Rand.Int31())
backoffLimit := int32(c.Rand.Int31())
j.Completions = &completions
j.Parallelism = &parallelism
j.BackoffLimit = &backoffLimit
if c.Rand.Int31()%2 == 0 {
j.ManualSelector = newBool(true)
} else {
Expand Down
10 changes: 10 additions & 0 deletions pkg/apis/batch/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ type JobSpec struct {
// +optional
ActiveDeadlineSeconds *int64

// Optional number of retries before marking this job failed.
// Defaults to 6
// +optional
BackoffLimit *int32

// TODO enabled it when https://github.com/kubernetes/kubernetes/issues/28486 has been fixed
// Optional number of failed pods to retain.
// +optional
// FailedPodsLimit *int32

// A label query over pods that should match the pod count.
// Normally, the system sets this field for you.
// +optional
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/batch/v1/conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func Convert_batch_JobSpec_To_v1_JobSpec(in *batch.JobSpec, out *batchv1.JobSpec
out.Parallelism = in.Parallelism
out.Completions = in.Completions
out.ActiveDeadlineSeconds = in.ActiveDeadlineSeconds
out.BackoffLimit = in.BackoffLimit
out.Selector = in.Selector
if in.ManualSelector != nil {
out.ManualSelector = new(bool)
Expand All @@ -71,6 +72,7 @@ func Convert_v1_JobSpec_To_batch_JobSpec(in *batchv1.JobSpec, out *batch.JobSpec
out.Parallelism = in.Parallelism
out.Completions = in.Completions
out.ActiveDeadlineSeconds = in.ActiveDeadlineSeconds
out.BackoffLimit = in.BackoffLimit
out.Selector = in.Selector
if in.ManualSelector != nil {
out.ManualSelector = new(bool)
Expand Down
4 changes: 4 additions & 0 deletions pkg/apis/batch/v1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ func SetDefaults_Job(obj *batchv1.Job) {
obj.Spec.Parallelism = new(int32)
*obj.Spec.Parallelism = 1
}
if obj.Spec.BackoffLimit == nil {
obj.Spec.BackoffLimit = new(int32)
*obj.Spec.BackoffLimit = 6
}
labels := obj.Spec.Template.Labels
if labels != nil && len(obj.Labels) == 0 {
obj.Labels = labels
Expand Down
105 changes: 66 additions & 39 deletions pkg/apis/batch/v1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func TestSetDefaultJob(t *testing.T) {
expected *batchv1.Job
expectLabels bool
}{
"both unspecified -> sets both to 1": {
"All unspecified -> sets all to default values": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Template: v1.PodTemplateSpec{
Expand All @@ -48,13 +48,14 @@ func TestSetDefaultJob(t *testing.T) {
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(1),
Parallelism: newInt32(1),
Completions: newInt32(1),
Parallelism: newInt32(1),
BackoffLimit: newInt32(6),
},
},
expectLabels: true,
},
"both unspecified -> sets both to 1 and no default labels": {
"All unspecified -> all integers are defaulted and no default labels": {
original: &batchv1.Job{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{"mylabel": "myvalue"},
Expand All @@ -67,12 +68,13 @@ func TestSetDefaultJob(t *testing.T) {
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(1),
Parallelism: newInt32(1),
Completions: newInt32(1),
Parallelism: newInt32(1),
BackoffLimit: newInt32(6),
},
},
},
"WQ: Parallelism explicitly 0 and completions unset -> no change": {
"WQ: Parallelism explicitly 0 and completions unset -> BackoffLimit is defaulted": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: newInt32(0),
Expand All @@ -83,12 +85,13 @@ func TestSetDefaultJob(t *testing.T) {
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: newInt32(0),
Parallelism: newInt32(0),
BackoffLimit: newInt32(6),
},
},
expectLabels: true,
},
"WQ: Parallelism explicitly 2 and completions unset -> no change": {
"WQ: Parallelism explicitly 2 and completions unset -> BackoffLimit is defaulted": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: newInt32(2),
Expand All @@ -99,12 +102,13 @@ func TestSetDefaultJob(t *testing.T) {
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Parallelism: newInt32(2),
Parallelism: newInt32(2),
BackoffLimit: newInt32(6),
},
},
expectLabels: true,
},
"Completions explicitly 2 and parallelism unset -> parallelism is defaulted": {
"Completions explicitly 2 and others unset -> parallelism and BackoffLimit are defaulted": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(2),
Expand All @@ -115,47 +119,70 @@ func TestSetDefaultJob(t *testing.T) {
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(2),
Parallelism: newInt32(1),
Completions: newInt32(2),
Parallelism: newInt32(1),
BackoffLimit: newInt32(6),
},
},
expectLabels: true,
},
"Both set -> no change": {
"BackoffLimit explicitly 5 and others unset -> parallelism and completions are defaulted": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(10),
Parallelism: newInt32(11),
BackoffLimit: newInt32(5),
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{Labels: defaultLabels},
},
},
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(10),
Parallelism: newInt32(11),
Completions: newInt32(1),
Parallelism: newInt32(1),
BackoffLimit: newInt32(5),
},
},
expectLabels: true,
},
"All set -> no change": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(8),
Parallelism: newInt32(9),
BackoffLimit: newInt32(10),
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{Labels: defaultLabels},
},
},
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(8),
Parallelism: newInt32(9),
BackoffLimit: newInt32(10),
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{Labels: defaultLabels},
},
},
},
expectLabels: true,
},
"Both set, flipped -> no change": {
"All set, flipped -> no change": {
original: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(11),
Parallelism: newInt32(10),
Completions: newInt32(11),
Parallelism: newInt32(10),
BackoffLimit: newInt32(9),
Template: v1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{Labels: defaultLabels},
},
},
},
expected: &batchv1.Job{
Spec: batchv1.JobSpec{
Completions: newInt32(11),
Parallelism: newInt32(10),
Completions: newInt32(11),
Parallelism: newInt32(10),
BackoffLimit: newInt32(9),
},
},
expectLabels: true,
Expand All @@ -171,22 +198,11 @@ func TestSetDefaultJob(t *testing.T) {
t.Errorf("%s: unexpected object: %v", name, actual)
t.FailNow()
}
if (actual.Spec.Completions == nil) != (expected.Spec.Completions == nil) {
t.Errorf("%s: got different *completions than expected: %v %v", name, actual.Spec.Completions, expected.Spec.Completions)
}
if actual.Spec.Completions != nil && expected.Spec.Completions != nil {
if *actual.Spec.Completions != *expected.Spec.Completions {
t.Errorf("%s: got different completions than expected: %d %d", name, *actual.Spec.Completions, *expected.Spec.Completions)
}
}
if (actual.Spec.Parallelism == nil) != (expected.Spec.Parallelism == nil) {
t.Errorf("%s: got different *Parallelism than expected: %v %v", name, actual.Spec.Parallelism, expected.Spec.Parallelism)
}
if actual.Spec.Parallelism != nil && expected.Spec.Parallelism != nil {
if *actual.Spec.Parallelism != *expected.Spec.Parallelism {
t.Errorf("%s: got different parallelism than expected: %d %d", name, *actual.Spec.Parallelism, *expected.Spec.Parallelism)
}
}

validateDefaultInt32(t, name, "Completions", actual.Spec.Completions, expected.Spec.Completions)
validateDefaultInt32(t, name, "Parallelism", actual.Spec.Parallelism, expected.Spec.Parallelism)
validateDefaultInt32(t, name, "BackoffLimit", actual.Spec.BackoffLimit, expected.Spec.BackoffLimit)

if test.expectLabels != reflect.DeepEqual(actual.Labels, actual.Spec.Template.Labels) {
if test.expectLabels {
t.Errorf("%s: expected: %v, got: %v", name, actual.Spec.Template.Labels, actual.Labels)
Expand All @@ -198,6 +214,17 @@ func TestSetDefaultJob(t *testing.T) {
}
}

func validateDefaultInt32(t *testing.T, name string, field string, actual *int32, expected *int32) {
if (actual == nil) != (expected == nil) {
t.Errorf("%s: got different *%s than expected: %v %v", name, field, actual, expected)
}
if actual != nil && expected != nil {
if *actual != *expected {
t.Errorf("%s: got different %s than expected: %d %d", name, field, *actual, *expected)
}
}
}

func roundTrip(t *testing.T, obj runtime.Object) runtime.Object {
data, err := runtime.Encode(api.Codecs.LegacyCodec(SchemeGroupVersion), obj)
if err != nil {
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/batch/v1/zz_generated.conversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ func autoConvert_v1_JobSpec_To_batch_JobSpec(in *v1.JobSpec, out *batch.JobSpec,
out.Parallelism = (*int32)(unsafe.Pointer(in.Parallelism))
out.Completions = (*int32)(unsafe.Pointer(in.Completions))
out.ActiveDeadlineSeconds = (*int64)(unsafe.Pointer(in.ActiveDeadlineSeconds))
out.BackoffLimit = (*int32)(unsafe.Pointer(in.BackoffLimit))
out.Selector = (*meta_v1.LabelSelector)(unsafe.Pointer(in.Selector))
out.ManualSelector = (*bool)(unsafe.Pointer(in.ManualSelector))
if err := api_v1.Convert_v1_PodTemplateSpec_To_api_PodTemplateSpec(&in.Template, &out.Template, s); err != nil {
Expand All @@ -173,6 +174,7 @@ func autoConvert_batch_JobSpec_To_v1_JobSpec(in *batch.JobSpec, out *v1.JobSpec,
out.Parallelism = (*int32)(unsafe.Pointer(in.Parallelism))
out.Completions = (*int32)(unsafe.Pointer(in.Completions))
out.ActiveDeadlineSeconds = (*int64)(unsafe.Pointer(in.ActiveDeadlineSeconds))
out.BackoffLimit = (*int32)(unsafe.Pointer(in.BackoffLimit))
out.Selector = (*meta_v1.LabelSelector)(unsafe.Pointer(in.Selector))
out.ManualSelector = (*bool)(unsafe.Pointer(in.ManualSelector))
if err := api_v1.Convert_api_PodTemplateSpec_To_v1_PodTemplateSpec(&in.Template, &out.Template, s); err != nil {
Expand Down