Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support Preemption while borrowing #1397

Merged
merged 5 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions apis/kueue/v1beta1/clusterqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,10 @@ type ClusterQueuePreemption struct {
// +kubebuilder:validation:Enum=Never;LowerPriority;Any
ReclaimWithinCohort PreemptionPolicy `json:"reclaimWithinCohort,omitempty"`

// borrowWithinCohort provides configuration to allow preemption within
// cohort while borrowing.
BorrowWithinCohort *BorrowWithinCohort `json:"borrowWithinCohort,omitempty"`

// withinClusterQueue determines whether a pending Workload that doesn't fit
// within the nominal quota for its ClusterQueue, can preempt active Workloads in
// the ClusterQueue. The possible values are:
Expand All @@ -382,6 +386,38 @@ type ClusterQueuePreemption struct {
WithinClusterQueue PreemptionPolicy `json:"withinClusterQueue,omitempty"`
}

type BorrowWithinCohortPolicy string

const (
BorrowWithinCohortPolicyNever BorrowWithinCohortPolicy = "Never"
BorrowWithinCohortPolicyLowerPriority BorrowWithinCohortPolicy = "LowerPriority"
)

// BorrowWithinCohort contains configuration which allows to preempt workloads
// within cohort while borrowing.
type BorrowWithinCohort struct {
// policy determines the policy for preemption to reclaim quota within cohort while borrowing.
// Possible values are:
// - `Never` (default): do not allow for preemption, in other
// ClusterQueues within the cohort, for a borrowing workload.
// - `LowerPriority`: allow preemption, in other ClusterQueues
// within the cohort, for a borrowing workload, but only if
// the preempted workloads are of lower priority.
//
// +kubebuilder:default=Never
// +kubebuilder:validation:Enum=Never;LowerPriority
Policy BorrowWithinCohortPolicy `json:"policy,omitempty"`

// maxPriorityThreshold allows to restrict the set of workloads which
// might be preempted by a borrowing workload, to only workloads with
// priority less than or equal to the specified threshold priority.
mimowo marked this conversation as resolved.
Show resolved Hide resolved
// When the threshold is not specified, then any workload satisfying the
// policy can be preempted by the borrowing workload.
//
// +optional
MaxPriorityThreshold *int32 `json:"maxPriorityThreshold,omitempty"`
}

//+genclient
//+genclient:nonNamespaced
//+kubebuilder:object:root=true
Expand Down
27 changes: 26 additions & 1 deletion apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions charts/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,33 @@ spec:
of Workloads to preempt to accomomdate the pending Workload, preempting
Workloads with lower priority first."
properties:
borrowWithinCohort:
description: borrowWithinCohort provides configuration to allow
preemption within cohort while borrowing.
properties:
maxPriorityThreshold:
description: maxPriorityThreshold allows to restrict the set
of workloads which might be preempted by a borrowing workload,
to only workloads with priority less than or equal to the
specified threshold priority. When the threshold is not
specified, then any workload satisfying the policy can be
preempted by the borrowing workload.
format: int32
type: integer
policy:
default: Never
description: 'policy determines the policy for preemption
to reclaim quota within cohort while borrowing. Possible
values are: - `Never` (default): do not allow for preemption,
in other ClusterQueues within the cohort, for a borrowing
workload. - `LowerPriority`: allow preemption, in other
ClusterQueues within the cohort, for a borrowing workload,
but only if the preempted workloads are of lower priority.'
enum:
- Never
- LowerPriority
type: string
type: object
reclaimWithinCohort:
default: Never
description: "reclaimWithinCohort determines whether a pending
Expand Down
51 changes: 51 additions & 0 deletions client-go/applyconfiguration/kueue/v1beta1/borrowwithincohort.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions config/components/crd/bases/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,33 @@ spec:
of Workloads to preempt to accomomdate the pending Workload, preempting
Workloads with lower priority first."
properties:
borrowWithinCohort:
description: borrowWithinCohort provides configuration to allow
preemption within cohort while borrowing.
properties:
maxPriorityThreshold:
description: maxPriorityThreshold allows to restrict the set
of workloads which might be preempted by a borrowing workload,
to only workloads with priority less than or equal to the
specified threshold priority. When the threshold is not
specified, then any workload satisfying the policy can be
preempted by the borrowing workload.
format: int32
type: integer
policy:
default: Never
description: 'policy determines the policy for preemption
to reclaim quota within cohort while borrowing. Possible
values are: - `Never` (default): do not allow for preemption,
in other ClusterQueues within the cohort, for a borrowing
workload. - `LowerPriority`: allow preemption, in other
ClusterQueues within the cohort, for a borrowing workload,
but only if the preempted workloads are of lower priority.'
enum:
- Never
- LowerPriority
type: string
type: object
reclaimWithinCohort:
default: Never
description: "reclaimWithinCohort determines whether a pending
Expand Down
27 changes: 21 additions & 6 deletions pkg/scheduler/flavorassigner/flavorassigner.go
Original file line number Diff line number Diff line change
Expand Up @@ -472,7 +472,9 @@ func shouldTryNextFlavor(representativeMode FlavorAssignmentMode, flavorFungibil
policyPreempt := flavorFungibility.WhenCanPreempt
policyBorrow := flavorFungibility.WhenCanBorrow
if representativeMode == Preempt && policyPreempt == kueue.Preempt {
return false
if !needsBorrowing || policyBorrow == kueue.Borrow {
return false
}
}

if representativeMode == Fit && needsBorrowing && policyBorrow == kueue.Borrow {
Expand Down Expand Up @@ -534,11 +536,13 @@ func flavorSelector(spec *corev1.PodSpec, allowedKeys sets.Set[string]) nodeaffi

// fitsResourceQuota returns how this flavor could be assigned to the resource,
// according to the remaining quota in the ClusterQueue and cohort.
// If it fits, also returns any borrowing required.
// If it fits, also returns if borrowing required. Similarly, it returns information
// if borrowing is required when preempting.
// If the flavor doesn't satisfy limits immediately (when waiting or preemption
// could help), it returns a Status with reasons.
func fitsResourceQuota(fName kueue.ResourceFlavorReference, rName corev1.ResourceName, val int64, cq *cache.ClusterQueue, rQuota *cache.ResourceQuota) (FlavorAssignmentMode, bool, *Status) {
var status Status
var borrow bool
used := cq.Usage[fName][rName]
mode := NoFit
if val <= rQuota.Nominal {
Expand All @@ -547,16 +551,27 @@ func fitsResourceQuota(fName kueue.ResourceFlavorReference, rName corev1.Resourc
// ClusterQueue are preempted.
mode = Preempt
}
cohortAvailable := rQuota.Nominal
if cq.Cohort != nil {
cohortAvailable = cq.Cohort.RequestableResources[fName][rName]
}

if cq.Preemption.BorrowWithinCohort != nil && cq.Preemption.BorrowWithinCohort.Policy != kueue.BorrowWithinCohortPolicyNever {
// when preemption with borrowing is enabled, we can succeeded admitting the
// workload if preemption is used.
if (rQuota.BorrowingLimit == nil || val <= rQuota.Nominal+*rQuota.BorrowingLimit) && val <= cohortAvailable {
mode = Preempt
borrow = val > rQuota.Nominal
}
}
if rQuota.BorrowingLimit != nil && used+val > rQuota.Nominal+*rQuota.BorrowingLimit {
status.append(fmt.Sprintf("borrowing limit for %s in flavor %s exceeded", rName, fName))
return mode, false, &status
return mode, borrow, &status
mimowo marked this conversation as resolved.
Show resolved Hide resolved
}

cohortUsed := used
cohortAvailable := rQuota.Nominal
if cq.Cohort != nil {
cohortUsed = cq.Cohort.Usage[fName][rName]
cohortAvailable = cq.Cohort.RequestableResources[fName][rName]
}

lack := cohortUsed + val - cohortAvailable
Expand All @@ -574,7 +589,7 @@ func fitsResourceQuota(fName kueue.ResourceFlavorReference, rName corev1.Resourc
}
}
status.append(msg)
return mode, false, &status
return mode, borrow, &status
}

func filterRequestedResources(req workload.Requests, allowList sets.Set[corev1.ResourceName]) workload.Requests {
Expand Down
Loading