From 5f8761b235fdc4a68e7759fe4027d3727863690f Mon Sep 17 00:00:00 2001 From: David Grove Date: Tue, 9 Jul 2024 13:55:14 -0400 Subject: [PATCH] use kustomize to deploy/configure Kueue instead of patching manifest --- hack/deploy-kueue.sh | 13 +--- .../controller_manager_config.yaml | 64 +++++++++++++++++++ hack/kueue-config/kustomization.yaml | 36 +++++++++++ hack/kueue-patches/01-manage-all-jobs.txt | 11 ---- .../02-aw-external-frameworks.txt | 28 -------- 5 files changed, 102 insertions(+), 50 deletions(-) create mode 100644 hack/kueue-config/controller_manager_config.yaml create mode 100644 hack/kueue-config/kustomization.yaml delete mode 100644 hack/kueue-patches/01-manage-all-jobs.txt delete mode 100644 hack/kueue-patches/02-aw-external-frameworks.txt diff --git a/hack/deploy-kueue.sh b/hack/deploy-kueue.sh index faa858a..ea04d6c 100755 --- a/hack/deploy-kueue.sh +++ b/hack/deploy-kueue.sh @@ -14,19 +14,10 @@ # Installs a kueue release onto an existing cluster -KUEUE_VERSION=v0.7.0 - export ROOT_DIR="$(dirname "$(dirname "$(readlink -fn "$0")")")" -echo "Downloading and patching Kueue ${KUEUE_VERSION} manifests" -wget -q https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml -O $ROOT_DIR/hack/kueue-manifest.yaml -patch -p 0 $ROOT_DIR/hack/kueue-manifest.yaml < $ROOT_DIR/hack/kueue-patches/01-manage-all-jobs.txt || exit 1 -patch -p 0 $ROOT_DIR/hack/kueue-manifest.yaml < $ROOT_DIR/hack/kueue-patches/02-aw-external-frameworks.txt || exit 1 - -echo "Deploying Kueue version $KUEUE_VERSION" -kubectl apply --server-side -f $ROOT_DIR/hack/kueue-manifest.yaml - -rm -f $ROOT_DIR/hack/kueue-manifest.yaml +echo "Deploying Kueue" +kubectl apply --server-side -k $ROOT_DIR/hack/kueue-config # Sleep until the kueue manager is running echo "Waiting for pods in the kueue-system namespace to become ready" diff --git a/hack/kueue-config/controller_manager_config.yaml b/hack/kueue-config/controller_manager_config.yaml new file mode 100644 index 0000000..96966b7 --- /dev/null +++ b/hack/kueue-config/controller_manager_config.yaml @@ -0,0 +1,64 @@ +apiVersion: config.kueue.x-k8s.io/v1beta1 +kind: Configuration +health: + healthProbeBindAddress: :8081 +metrics: + bindAddress: :8080 +# enableClusterQueueResources: true +webhook: + port: 9443 +leaderElection: + leaderElect: true + resourceName: c1f6bfd2.kueue.x-k8s.io +controller: + groupKindConcurrency: + Job.batch: 5 + Pod: 5 + Workload.kueue.x-k8s.io: 5 + LocalQueue.kueue.x-k8s.io: 1 + ClusterQueue.kueue.x-k8s.io: 1 + ResourceFlavor.kueue.x-k8s.io: 1 +clientConnection: + qps: 50 + burst: 100 +#pprofBindAddress: :8083 +#waitForPodsReady: +# enable: false +# timeout: 5m +# blockAdmission: false +# requeuingStrategy: +# timestamp: Eviction +# backoffLimitCount: null # null indicates infinite requeuing +# backoffBaseSeconds: 60 +# backoffMaxSeconds: 3600 +manageJobsWithoutQueueName: true +#internalCertManagement: +# enable: false +# webhookServiceName: "" +# webhookSecretName: "" +integrations: + frameworks: + - "batch/job" + - "kubeflow.org/mpijob" + - "ray.io/rayjob" + - "ray.io/raycluster" + - "jobset.x-k8s.io/jobset" + - "kubeflow.org/mxjob" + - "kubeflow.org/paddlejob" + - "kubeflow.org/pytorchjob" + - "kubeflow.org/tfjob" + - "kubeflow.org/xgboostjob" + # - "pod" + externalFrameworks: + - "AppWrapper.v1beta2.workload.codeflare.dev" +# podOptions: +# namespaceSelector: +# matchExpressions: +# - key: kubernetes.io/metadata.name +# operator: NotIn +# values: [ kube-system, kueue-system ] +#fairSharing: +# enable: true +# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare] +#resources: +# excludeResourcePrefixes: [] diff --git a/hack/kueue-config/kustomization.yaml b/hack/kueue-config/kustomization.yaml new file mode 100644 index 0000000..1578f82 --- /dev/null +++ b/hack/kueue-config/kustomization.yaml @@ -0,0 +1,36 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: kueue-system + +resources: +- "https://github.com/kubernetes-sigs/kueue/config/default?ref=v0.7.0" + +configMapGenerator: +- name: manager-config + namespace: kueue-system + behavior: replace + files: + - controller_manager_config.yaml + +images: +- name: gcr.io/k8s-staging-kueue/kueue + newName: registry.k8s.io/kueue/kueue + newTag: v0.7.0 + +patches: +- target: + kind: ClusterRole + name: manager-role + patch: | + - op: add + path: /rules/- + value: + apiGroups: + - workload.codeflare.dev + resources: + - appwrappers + verbs: + - get + - list + - watch diff --git a/hack/kueue-patches/01-manage-all-jobs.txt b/hack/kueue-patches/01-manage-all-jobs.txt deleted file mode 100644 index ee11c34..0000000 --- a/hack/kueue-patches/01-manage-all-jobs.txt +++ /dev/null @@ -1,11 +0,0 @@ ---- manifests.yaml 2024-06-26 13:58:48.132795505 -0400 -+++ manifests.yaml 2024-06-26 13:59:54.945553273 -0400 -@@ -11878,7 +11878,7 @@ - # backoffLimitCount: null # null indicates infinite requeuing - # backoffBaseSeconds: 60 - # backoffMaxSeconds: 3600 -- #manageJobsWithoutQueueName: true -+ manageJobsWithoutQueueName: true - #internalCertManagement: - # enable: false - # webhookServiceName: "" diff --git a/hack/kueue-patches/02-aw-external-frameworks.txt b/hack/kueue-patches/02-aw-external-frameworks.txt deleted file mode 100644 index 095e979..0000000 --- a/hack/kueue-patches/02-aw-external-frameworks.txt +++ /dev/null @@ -1,28 +0,0 @@ ---- manifests.yaml 2024-06-26 13:59:54.945553273 -0400 -+++ manifests.yaml 2024-06-26 14:02:25.889855296 -0400 -@@ -11225,6 +11225,14 @@ - - get - - list - - watch -+- apiGroups: -+ - workload.codeflare.dev -+ resources: -+ - appwrappers -+ verbs: -+ - get -+ - list -+ - watch - --- - apiVersion: rbac.authorization.k8s.io/v1 - kind: ClusterRole -@@ -11896,8 +11904,8 @@ - - "kubeflow.org/tfjob" - - "kubeflow.org/xgboostjob" - # - "pod" -- # externalFrameworks: -- # - "Foo.v1.example.com" -+ externalFrameworks: -+ - "AppWrapper.v1beta2.workload.codeflare.dev" - # podOptions: - # namespaceSelector: - # matchExpressions: