Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 2 additions & 11 deletions hack/deploy-kueue.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,10 @@

# Installs a kueue release onto an existing cluster

KUEUE_VERSION=v0.7.0

export ROOT_DIR="$(dirname "$(dirname "$(readlink -fn "$0")")")"

echo "Downloading and patching Kueue ${KUEUE_VERSION} manifests"
wget -q https://github.com/kubernetes-sigs/kueue/releases/download/${KUEUE_VERSION}/manifests.yaml -O $ROOT_DIR/hack/kueue-manifest.yaml
patch -p 0 $ROOT_DIR/hack/kueue-manifest.yaml < $ROOT_DIR/hack/kueue-patches/01-manage-all-jobs.txt || exit 1
patch -p 0 $ROOT_DIR/hack/kueue-manifest.yaml < $ROOT_DIR/hack/kueue-patches/02-aw-external-frameworks.txt || exit 1

echo "Deploying Kueue version $KUEUE_VERSION"
kubectl apply --server-side -f $ROOT_DIR/hack/kueue-manifest.yaml

rm -f $ROOT_DIR/hack/kueue-manifest.yaml
echo "Deploying Kueue"
kubectl apply --server-side -k $ROOT_DIR/hack/kueue-config

# Sleep until the kueue manager is running
echo "Waiting for pods in the kueue-system namespace to become ready"
Expand Down
64 changes: 64 additions & 0 deletions hack/kueue-config/controller_manager_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
apiVersion: config.kueue.x-k8s.io/v1beta1
kind: Configuration
health:
healthProbeBindAddress: :8081
metrics:
bindAddress: :8080
# enableClusterQueueResources: true
webhook:
port: 9443
leaderElection:
leaderElect: true
resourceName: c1f6bfd2.kueue.x-k8s.io
controller:
groupKindConcurrency:
Job.batch: 5
Pod: 5
Workload.kueue.x-k8s.io: 5
LocalQueue.kueue.x-k8s.io: 1
ClusterQueue.kueue.x-k8s.io: 1
ResourceFlavor.kueue.x-k8s.io: 1
clientConnection:
qps: 50
burst: 100
#pprofBindAddress: :8083
#waitForPodsReady:
# enable: false
# timeout: 5m
# blockAdmission: false
# requeuingStrategy:
# timestamp: Eviction
# backoffLimitCount: null # null indicates infinite requeuing
# backoffBaseSeconds: 60
# backoffMaxSeconds: 3600
manageJobsWithoutQueueName: true
#internalCertManagement:
# enable: false
# webhookServiceName: ""
# webhookSecretName: ""
integrations:
frameworks:
- "batch/job"
- "kubeflow.org/mpijob"
- "ray.io/rayjob"
- "ray.io/raycluster"
- "jobset.x-k8s.io/jobset"
- "kubeflow.org/mxjob"
- "kubeflow.org/paddlejob"
- "kubeflow.org/pytorchjob"
- "kubeflow.org/tfjob"
- "kubeflow.org/xgboostjob"
# - "pod"
externalFrameworks:
- "AppWrapper.v1beta2.workload.codeflare.dev"
# podOptions:
# namespaceSelector:
# matchExpressions:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
#fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
#resources:
# excludeResourcePrefixes: []
36 changes: 36 additions & 0 deletions hack/kueue-config/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization

namespace: kueue-system

resources:
- "https://github.com/kubernetes-sigs/kueue/config/default?ref=v0.7.0"

configMapGenerator:
- name: manager-config
namespace: kueue-system
behavior: replace
files:
- controller_manager_config.yaml

images:
- name: gcr.io/k8s-staging-kueue/kueue
newName: registry.k8s.io/kueue/kueue
newTag: v0.7.0

patches:
- target:
kind: ClusterRole
name: manager-role
patch: |
- op: add
path: /rules/-
value:
apiGroups:
- workload.codeflare.dev
resources:
- appwrappers
verbs:
- get
- list
- watch
11 changes: 0 additions & 11 deletions hack/kueue-patches/01-manage-all-jobs.txt

This file was deleted.

28 changes: 0 additions & 28 deletions hack/kueue-patches/02-aw-external-frameworks.txt

This file was deleted.