Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions api/v1beta2/appwrapper_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ type AppWrapperPodSet struct {

// Path is the path Component.Template to the PodTemplateSpec for this PodSet
Path string `json:"path"`

// Annotations is an unstructured key value map that may be used to store and retrieve
// arbitrary metadata about the PodSet to customize its treatment by the AppWrapper controller.
//+optional
Annotations map[string]string `json:"annotations,omitempty"`
}

// AppWrapperPodSetInfo contains the data that Kueue wants to inject into an admitted PodSpecTemplate
Expand Down
7 changes: 7 additions & 0 deletions api/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions config/crd/bases/workload.codeflare.dev_appwrappers.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,13 @@ spec:
description: AppWrapperPodSet describes an homogeneous set
of pods
properties:
annotations:
additionalProperties:
type: string
description: |-
Annotations is an unstructured key value map that may be used to store and retrieve
arbitrary metadata about the PodSet to customize its treatment by the AppWrapper controller.
type: object
path:
description: Path is the path Component.Template to the
PodTemplateSpec for this PodSet
Expand Down Expand Up @@ -280,6 +287,13 @@ spec:
description: AppWrapperPodSet describes an homogeneous set
of pods
properties:
annotations:
additionalProperties:
type: string
description: |-
Annotations is an unstructured key value map that may be used to store and retrieve
arbitrary metadata about the PodSet to customize its treatment by the AppWrapper controller.
type: object
path:
description: Path is the path Component.Template to the
PodTemplateSpec for this PodSet
Expand Down
12 changes: 6 additions & 6 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ go 1.23.0
require (
github.com/distribution/reference v0.5.0
github.com/go-logr/logr v1.4.2
github.com/golangci/golangci-lint v1.60.1
github.com/golangci/golangci-lint v1.63.4
github.com/kubeflow/training-operator v1.8.1
github.com/onsi/ginkgo/v2 v2.22.0
github.com/onsi/gomega v1.36.1
github.com/open-policy-agent/cert-controller v0.12.0
Expand All @@ -17,6 +18,7 @@ require (
k8s.io/utils v0.0.0-20240711033017-18e509b52bc8
sigs.k8s.io/controller-runtime v0.19.3
sigs.k8s.io/controller-tools v0.16.5
sigs.k8s.io/jobset v0.7.1
sigs.k8s.io/kueue v0.10.1
sigs.k8s.io/kustomize/kustomize/v5 v5.5.0
sigs.k8s.io/yaml v1.4.0
Expand Down Expand Up @@ -61,7 +63,6 @@ require (
github.com/json-iterator/go v1.1.12 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/kubeflow/mpi-operator v0.6.0 // indirect
github.com/kubeflow/training-operator v1.8.1 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
Expand Down Expand Up @@ -90,16 +91,16 @@ require (
go.opentelemetry.io/proto/otlp v1.3.1 // indirect
go.uber.org/atomic v1.11.0 // indirect
go.uber.org/multierr v1.11.0 // indirect
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect
golang.org/x/mod v0.21.0 // indirect
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 // indirect
golang.org/x/mod v0.22.0 // indirect
golang.org/x/net v0.33.0 // indirect
golang.org/x/oauth2 v0.21.0 // indirect
golang.org/x/sync v0.10.0 // indirect
golang.org/x/sys v0.28.0 // indirect
golang.org/x/term v0.27.0 // indirect
golang.org/x/text v0.21.0 // indirect
golang.org/x/time v0.6.0 // indirect
golang.org/x/tools v0.26.0 // indirect
golang.org/x/tools v0.28.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20240528184218-531527333157 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
Expand All @@ -116,7 +117,6 @@ require (
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20240812233141-91dab695df6f // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.30.3 // indirect
sigs.k8s.io/jobset v0.7.1 // indirect
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect
sigs.k8s.io/kustomize/api v0.18.0 // indirect
sigs.k8s.io/kustomize/kyaml v0.18.1 // indirect
Expand Down
24 changes: 12 additions & 12 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golangci/golangci-lint v1.60.1 h1:DRKNqNTQRLBJZ1il5u4fvgLQCjQc7QFs0DbhksJtVJE=
github.com/golangci/golangci-lint v1.60.1/go.mod h1:jDIPN1rYaIA+ijp9OZcUmUCoQOtZ76pOlFbi15FlLJY=
github.com/golangci/golangci-lint v1.63.4 h1:bJQFQ3hSfUto597dkL7ipDzOxsGEpiWdLiZ359OWOBI=
github.com/golangci/golangci-lint v1.63.4/go.mod h1:Hx0B7Lg5/NXbaOHem8+KU+ZUIzMI6zNj/7tFwdnn10I=
github.com/google/cel-go v0.20.1 h1:nDx9r8S3L4pE61eDdt8igGj8rf5kjYR3ILxWIpWNi84=
github.com/google/cel-go v0.20.1/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg=
github.com/google/gnostic-models v0.6.8 h1:yo/ABAfM5IMRsS1VnXjTBvUb61tFIHozhlYvRgGre9I=
Expand Down Expand Up @@ -143,8 +143,8 @@ github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0leargg
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/ray-project/kuberay/ray-operator v1.2.2 h1:wj4qe9SmJfD1ubgEaVPuAsnU/WFDvremzR8j3JslBdk=
github.com/ray-project/kuberay/ray-operator v1.2.2/go.mod h1:osTiIyaDoWi5IN1f0tOOtZ4TzVf+5kJXZor8VFvcEiI=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
Expand All @@ -166,8 +166,8 @@ github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg=
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM=
github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg=
github.com/xlab/treeprint v1.2.0 h1:HzHnuAF1plUN2zGlAFHbSQP2qJ0ZAD3XF5XD7OesXRQ=
Expand Down Expand Up @@ -201,12 +201,12 @@ go.uber.org/zap v1.27.0/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8=
golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY=
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk=
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.21.0 h1:vvrHzRwRfVKSiLrG+d4FMl/Qi4ukBCE6kZlTUkDYRT0=
golang.org/x/mod v0.21.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/mod v0.22.0 h1:D4nJWe9zXqHOmWqj4VMOJhvzj7bEZg4wEYa759z1pH4=
golang.org/x/mod v0.22.0/go.mod h1:6SkKJ3Xj0I0BrPOZoBy3bdMptDDU9oJrpohJ3eWZ1fY=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
Expand Down Expand Up @@ -240,8 +240,8 @@ golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGm
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ=
golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0=
golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8=
golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
Expand Down
48 changes: 40 additions & 8 deletions pkg/utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,14 @@ import (

dockerref "github.com/distribution/reference"

kftraining "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
batchv1 "k8s.io/api/batch/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/utils/ptr"
jobsetapi "sigs.k8s.io/jobset/api/jobset/v1alpha2"

kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
"sigs.k8s.io/kueue/pkg/podset"
Expand All @@ -43,6 +46,12 @@ import (

const templateString = "template"

const (
PodSetAnnotationTASPodIndexLabel = "workload.codeflare.dev.appwrapper/tas-pod-index-label"
PodSetAnnotationTASSubGroupIndexLabel = "workload.codeflare.dev.appwrapper/tas-sub-group-index-label"
PodSetAnnotationTASSubGroupCount = "workload.codeflare.dev.appwrapper/tas-sub-group-count"
)

// GetPodTemplateSpec extracts a Kueue-compatible PodTemplateSpec at the given path within obj
func GetPodTemplateSpec(obj *unstructured.Unstructured, path string) (*v1.PodTemplateSpec, error) {
candidatePTS, err := GetRawTemplate(obj.UnstructuredContent(), path)
Expand Down Expand Up @@ -490,7 +499,13 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP
if completions, err := GetReplicas(obj, "template.spec.completions"); err == nil && completions < replicas {
replicas = completions
}
podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: "template.spec.template"})
podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{
Replicas: ptr.To(replicas),
Path: "template.spec.template",
Annotations: map[string]string{
PodSetAnnotationTASPodIndexLabel: batchv1.JobCompletionIndexAnnotation,
},
})

case schema.GroupVersionKind{Group: "jobset.x-k8s.io", Version: "v1alpha2", Kind: "JobSet"}:
if jobs, err := getValueAtPath(obj.UnstructuredContent(), "template.spec.replicatedJobs"); err == nil {
Expand All @@ -499,15 +514,26 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP
jobSpecPrefix := fmt.Sprintf("template.spec.replicatedJobs[%v].", i)
// validate path to replica template
if _, err := getValueAtPath(obj.UnstructuredContent(), jobSpecPrefix+"template"); err == nil {
var replicas int32 = 1
var podCount int32 = 1
if parallelism, err := GetReplicas(obj, jobSpecPrefix+"template.spec.parallelism"); err == nil {
replicas = parallelism
podCount = parallelism
}
if completions, err := GetReplicas(obj, jobSpecPrefix+"template.spec.completions"); err == nil && completions < replicas {
replicas = completions
if completions, err := GetReplicas(obj, jobSpecPrefix+"template.spec.completions"); err == nil && completions < podCount {
podCount = completions
}
var replicas int32 = 1
if r, err := GetReplicas(obj, jobSpecPrefix+"replicas"); err == nil {
replicas = r
}
// infer replica count
podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: jobSpecPrefix + "template.spec.template"})
podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{
Replicas: ptr.To(replicas * podCount),
Path: jobSpecPrefix + "template.spec.template",
Annotations: map[string]string{
PodSetAnnotationTASPodIndexLabel: batchv1.JobCompletionIndexAnnotation,
PodSetAnnotationTASSubGroupIndexLabel: jobsetapi.JobIndexKey,
PodSetAnnotationTASSubGroupCount: strconv.Itoa(int(replicas)),
},
})
}
}
}
Expand All @@ -523,7 +549,13 @@ func InferPodSets(obj *unstructured.Unstructured) ([]workloadv1beta2.AppWrapperP
if err != nil {
return nil, err
}
podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{Replicas: ptr.To(replicas), Path: prefix + templateString})
podSets = append(podSets, workloadv1beta2.AppWrapperPodSet{
Replicas: ptr.To(replicas),
Path: prefix + templateString,
Annotations: map[string]string{
PodSetAnnotationTASPodIndexLabel: kftraining.ReplicaIndexLabel,
},
})
}
}

Expand Down
5 changes: 3 additions & 2 deletions samples/wrapped-jobset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,11 @@ spec:
spec:
replicatedJobs:
- name: workers
replicas: 2
template:
spec:
parallelism: 4
completions: 4
parallelism: 2
completions: 2
backoffLimit: 0
template:
spec:
Expand Down
2 changes: 1 addition & 1 deletion samples/wrapped-pytorch-job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ spec:
requests:
cpu: 1
Worker:
replicas: 1
replicas: 2
restartPolicy: OnFailure
template:
spec:
Expand Down
Loading