Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add APIs for configuring fair sharing #2070

Merged
merged 10 commits into from
May 8, 2024
34 changes: 34 additions & 0 deletions apis/config/v1beta1/configuration_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,9 @@ type Configuration struct {

// MultiKueue controls the behaviour of the MultiKueue AdmissionCheck Controller.
MultiKueue *MultiKueue `json:"multiKueue,omitempty"`

// FairSharing controls the fair sharing semantics across the cluster.
FairSharing *FairSharing `json:"fairSharing,omitempty"`
}

type ControllerManager struct {
Expand Down Expand Up @@ -352,3 +355,34 @@ type ClusterQueueVisibility struct {
// Defaults to 10.
MaxCount int32 `json:"maxCount,omitempty"`
}

type PreemptionStrategy string

const (
LessThanOrEqualToFinalShare PreemptionStrategy = "LessThanOrEqualToFinalShare"
LessThanInitialShare PreemptionStrategy = "LessThanInitialShare"
alculquicondor marked this conversation as resolved.
Show resolved Hide resolved
)

type FairSharing struct {
// enable indicates whether to enable fair sharing for all cohorts.
// Defaults to false.
Enable bool `json:"enable"`

// preemptionStrategies indicates which constraints should a preemption satisfy.
// The preemption algorithm will only use the next strategy in the list if the
// incoming workload (preemptor) doesn't fit after using the previous strategies.
// Possible values are:
// - LessThanOrEqualToFinalShare: Only preempt a workload if the share of the preemptor CQ
// with the preemptor workload is less than or equal to the share of the preemptee CQ
// without the workload to be preempted.
// This strategy might favor preemption of smaller workloads in the preemptee CQ,
// regardless of priority or start time, in an effort to keep the share of the CQ
// as high as possible.
// - LessThanInitialShare: Only preempt a workload if the share of the preemptor CQ
// with the incoming workload is strictly less than the share of the preemptee CQ.
// This strategy doesn't depend on the share usage of the workload being preempted.
// As a result, the strategy chooses to preempt workloads with the lowest priority and
// newest start time first.
// The default strategy is ["LessThanOrEqualToFinalShare", "LessThanInitialShare"].
PreemptionStrategies []PreemptionStrategy `json:"preemptionStrategies,omitempty"`
alculquicondor marked this conversation as resolved.
Show resolved Hide resolved
alculquicondor marked this conversation as resolved.
Show resolved Hide resolved
}
3 changes: 3 additions & 0 deletions apis/config/v1beta1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,7 @@ func SetDefaults_Configuration(cfg *Configuration) {
if cfg.MultiKueue.WorkerLostTimeout == nil {
cfg.MultiKueue.WorkerLostTimeout = &metav1.Duration{Duration: DefaultMultiKueueWorkerLostTimeout}
}
if fs := cfg.FairSharing; fs != nil && fs.Enable && len(fs.PreemptionStrategies) == 0 {
fs.PreemptionStrategies = []PreemptionStrategy{LessThanOrEqualToFinalShare, LessThanInitialShare}
}
}
25 changes: 25 additions & 0 deletions apis/config/v1beta1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,31 @@ func TestSetDefaults_Configuration(t *testing.T) {
},
},
},
"add default fair sharing configuration when enabled": {
original: &Configuration{
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
FairSharing: &FairSharing{
Enable: true,
},
},
want: &Configuration{
Namespace: ptr.To(DefaultNamespace),
ControllerManager: defaultCtrlManagerConfigurationSpec,
InternalCertManagement: &InternalCertManagement{
Enable: ptr.To(false),
},
ClientConnection: defaultClientConnection,
Integrations: defaultIntegrations,
QueueVisibility: defaultQueueVisibility,
MultiKueue: defaultMultiKueue,
FairSharing: &FairSharing{
Enable: true,
PreemptionStrategies: []PreemptionStrategy{LessThanOrEqualToFinalShare, LessThanInitialShare},
},
},
},
}

for name, tc := range testCases {
Expand Down
25 changes: 25 additions & 0 deletions apis/config/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

18 changes: 18 additions & 0 deletions apis/kueue/v1beta1/clusterqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ type ClusterQueueSpec struct {
// +kubebuilder:validation:Enum=None;Hold;HoldAndDrain
// +kubebuilder:default="None"
StopPolicy *StopPolicy `json:"stopPolicy,omitempty"`

// fairSharing defines the properties of the ClusterQueue when participating in fair sharing.
// The values are only relevant if fair sharing is enabled in the Kueue configuration.
FairSharing *FairSharing `json:"fairSharing,omitempty"`
}

// AdmissionCheckStrategy defines a strategy for a AdmissionCheck.
Expand Down Expand Up @@ -464,6 +468,20 @@ type BorrowWithinCohort struct {
MaxPriorityThreshold *int32 `json:"maxPriorityThreshold,omitempty"`
}

// FairSharing contains the properties of the ClusterQueue when participating in fair sharing.
type FairSharing struct {
// weight gives a comparative advantage to this ClusterQueue when competing for unused
// resources in the cohort against other ClusterQueues.
// The share of a ClusterQueue is based on the dominant resource usage above nominal
// quotas for each resource, divided by the weight.
// Admission prioritizes scheduling workloads from ClusterQueues with the lowest share
// and preempting workloads from the ClusterQueues with the highest share.
alculquicondor marked this conversation as resolved.
Show resolved Hide resolved
// A zero weight implies infinite share value, meaning that this ClusterQueue will always
// be at disadvantage against other ClusterQueues.
// +kubebuilder:default=1
Weight *resource.Quantity `json:"weight,omitempty"`
alculquicondor marked this conversation as resolved.
Show resolved Hide resolved
}

// +genclient
// +genclient:nonNamespaced
// +kubebuilder:object:root=true
Expand Down
25 changes: 25 additions & 0 deletions apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions charts/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,28 @@ spec:
maxLength: 253
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
fairSharing:
description: |-
fairSharing defines the properties of the ClusterQueue when participating in fair sharing.
The values are only relevant if fair sharing is enabled in the Kueue configuration.
properties:
weight:
anyOf:
- type: integer
- type: string
default: 1
description: |-
weight gives a comparative advantage to this ClusterQueue when competing for unused
resources in the cohort against other ClusterQueues.
The share of a ClusterQueue is based on the dominant resource usage above nominal
quotas for each resource, divided by the weight.
Admission prioritizes scheduling workloads from ClusterQueues with the lowest share
and preempting workloads from the ClusterQueues with the highest share.
A zero weight implies infinite share value, meaning that this ClusterQueue will always
be at disadvantage against other ClusterQueues.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
flavorFungibility:
default: {}
description: |-
Expand Down
3 changes: 3 additions & 0 deletions charts/kueue/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ managerConfig:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
# fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
# ports definition for metricsService and webhookService.
metricsService:
ports:
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

42 changes: 42 additions & 0 deletions client-go/applyconfiguration/kueue/v1beta1/fairsharing.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions cmd/kueue/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ func setupScheduler(mgr ctrl.Manager, cCache *cache.Cache, queues *queue.Manager
mgr.GetClient(),
mgr.GetEventRecorderFor(constants.AdmissionName),
scheduler.WithPodsReadyRequeuingTimestamp(podsReadyRequeuingTimestamp(cfg)),
scheduler.WithFairSharing(cfg.FairSharing),
)
if err := mgr.Add(sched); err != nil {
setupLog.Error(err, "Unable to add scheduler to manager")
Expand Down
22 changes: 22 additions & 0 deletions config/components/crd/bases/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,28 @@ spec:
maxLength: 253
pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$
type: string
fairSharing:
description: |-
fairSharing defines the properties of the ClusterQueue when participating in fair sharing.
The values are only relevant if fair sharing is enabled in the Kueue configuration.
properties:
weight:
anyOf:
- type: integer
- type: string
default: 1
description: |-
weight gives a comparative advantage to this ClusterQueue when competing for unused
resources in the cohort against other ClusterQueues.
The share of a ClusterQueue is based on the dominant resource usage above nominal
quotas for each resource, divided by the weight.
Admission prioritizes scheduling workloads from ClusterQueues with the lowest share
and preempting workloads from the ClusterQueues with the highest share.
A zero weight implies infinite share value, meaning that this ClusterQueue will always
be at disadvantage against other ClusterQueues.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
type: object
flavorFungibility:
default: {}
description: |-
Expand Down
3 changes: 3 additions & 0 deletions config/components/manager/controller_manager_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,6 @@ integrations:
# - key: kubernetes.io/metadata.name
# operator: NotIn
# values: [ kube-system, kueue-system ]
# fairSharing:
# enable: true
# preemptionStrategies: [LessThanOrEqualToFinalShare, LessThanInitialShare]
Loading