Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add validation for child jobs without ownerReference #865

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions pkg/controller/jobframework/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,16 @@ func validateLabelAsCRDName(job GenericJob, crdNameLabel string) field.ErrorList
return allErrs
}

func ValidateCreateForParentWorkload(job GenericJob) field.ErrorList {
var allErrs field.ErrorList
if _, exists := job.Object().GetAnnotations()[constants.ParentWorkloadAnnotation]; exists {
if job.Object().GetOwnerReferences() == nil {
allErrs = append(allErrs, field.Forbidden(parentWorkloadKeyPath, "must not add a parent workload annotation to job without OwnerReference"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we could also check that the owner is a well-known job owner.

And maybe there should be a Kueue configuration that allows to say which other APIs are job owners (in addition to the ones implemented in-tree).
This way, it should be possible to write external integration controllers and tell kueue to trust particular owner references and not suspend the jobs.

Once we have this trust, we can potentially simplify this code and not query weather the parent workload has a queue name

if !r.manageJobsWithoutQueueName && QueueName(job) == "" {
if isStandaloneJob {
log.V(3).Info("Neither queue-name label, nor parent-workload annotation is set, ignoring the job",
"queueName", QueueName(job), "parentWorkload", ParentWorkloadName(job))
return ctrl.Result{}, nil
}
isParentJobManaged, err := r.IsParentJobManaged(ctx, job.Object(), namespacedName.Namespace)
if err != nil {
log.Error(err, "couldn't check whether the parent job is managed by kueue")
return ctrl.Result{}, err
}
if !isParentJobManaged {
log.V(3).Info("parent-workload annotation is set, and the parent job doesn't have a queue-name label, ignoring the job",
"parentWorkload", ParentWorkloadName(job))
return ctrl.Result{}, nil
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't see any disadvantages to trust custom jobs, so I'm ok with this validation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It sounds good. Let me try to work on the feature in follow-ups.

}
}
return allErrs
}

func ValidateUpdateForQueueName(oldJob, newJob GenericJob) field.ErrorList {
var allErrs field.ErrorList
if !newJob.IsSuspended() && (QueueName(oldJob) != QueueName(newJob)) {
Expand Down
1 change: 1 addition & 0 deletions pkg/controller/jobs/job/job_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ func validateCreate(job *Job) field.ErrorList {
allErrs = append(allErrs, jobframework.ValidateAnnotationAsCRDName(job, constants.ParentWorkloadAnnotation)...)
allErrs = append(allErrs, jobframework.ValidateCreateForQueueName(job)...)
allErrs = append(allErrs, validatePartialAdmissionCreate(job)...)
allErrs = append(allErrs, jobframework.ValidateCreateForParentWorkload(job)...)
return allErrs
}

Expand Down
47 changes: 38 additions & 9 deletions pkg/controller/jobs/job/job_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,15 +58,33 @@ func TestValidateCreate(t *testing.T) {
wantErr: nil,
},
{
name: "valid parent-workload annotation",
job: testingutil.MakeJob("job", "default").ParentWorkload("parent-workload-name").Queue("queue").Obj(),
name: "valid parent-workload annotation",
job: testingutil.MakeJob("job", "default").
ParentWorkload("parent-workload-name").
Queue("queue").
OwnerReference("parent-workload-name", kubeflow.SchemeGroupVersionKind).
Obj(),
wantErr: nil,
},
{
name: "invalid parent-workload annotation",
job: testingutil.MakeJob("job", "default").ParentWorkload("parent workload name").Queue("queue").Obj(),
name: "invalid parent-workload annotation",
job: testingutil.MakeJob("job", "default").
ParentWorkload("parent workload name").
OwnerReference("parent workload name", kubeflow.SchemeGroupVersionKind).
Queue("queue").
Obj(),
wantErr: field.ErrorList{field.Invalid(parentWorkloadKeyPath, "parent workload name", invalidRFC1123Message)},
},
{
name: "invalid parent-workload annotation (owner is missing)",
job: testingutil.MakeJob("job", "default").
ParentWorkload("parent-workload").
Queue("queue").
Obj(),
wantErr: field.ErrorList{
field.Forbidden(parentWorkloadKeyPath, "must not add a parent workload annotation to job without OwnerReference"),
},
},
{
name: "invalid queue-name label",
job: testingutil.MakeJob("job", "default").Queue("queue name").Obj(),
Expand All @@ -79,7 +97,11 @@ func TestValidateCreate(t *testing.T) {
},
{
name: "invalid queue-name and parent-workload annotation",
job: testingutil.MakeJob("job", "default").Queue("queue name").ParentWorkload("parent workload name").Obj(),
job: testingutil.MakeJob("job", "default").
Queue("queue name").
ParentWorkload("parent workload name").
OwnerReference("parent workload name", kubeflow.SchemeGroupVersionKind).
Obj(),
wantErr: field.ErrorList{
field.Invalid(parentWorkloadKeyPath, "parent workload name", invalidRFC1123Message),
field.Invalid(queueNameLabelPath, "queue name", invalidRFC1123Message),
Expand Down Expand Up @@ -173,9 +195,12 @@ func TestValidateUpdate(t *testing.T) {
wantErr: field.ErrorList{field.Invalid(queueNameLabelPath, "queue name", invalidRFC1123Message)},
},
{
name: "update the nil parent workload to non-empty",
oldJob: testingutil.MakeJob("job", "default").Obj(),
newJob: testingutil.MakeJob("job", "default").ParentWorkload("parent").Obj(),
name: "update the nil parent workload to non-empty",
oldJob: testingutil.MakeJob("job", "default").Obj(),
newJob: testingutil.MakeJob("job", "default").
ParentWorkload("parent").
OwnerReference("parent", kubeflow.SchemeGroupVersionKind).
Obj(),
wantErr: field.ErrorList{field.Forbidden(parentWorkloadKeyPath, "this annotation is immutable")},
},
{
Expand All @@ -187,7 +212,11 @@ func TestValidateUpdate(t *testing.T) {
{
name: "invalid queue name and immutable parent",
oldJob: testingutil.MakeJob("job", "default").Obj(),
newJob: testingutil.MakeJob("job", "default").Queue("queue name").ParentWorkload("parent").Obj(),
newJob: testingutil.MakeJob("job", "default").
Queue("queue name").
ParentWorkload("parent").
OwnerReference("parent", kubeflow.SchemeGroupVersionKind).
Obj(),
wantErr: field.ErrorList{
field.Invalid(queueNameLabelPath, "queue name", invalidRFC1123Message),
field.Forbidden(parentWorkloadKeyPath, "this annotation is immutable"),
Expand Down