generated from kubernetes/kubernetes-template-project
/
clusterqueue_types.go
465 lines (403 loc) · 18.9 KB
/
clusterqueue_types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1beta1
import (
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
// ClusterQueueSpec defines the desired state of ClusterQueue
type ClusterQueueSpec struct {
// resourceGroups describes groups of resources.
// Each resource group defines the list of resources and a list of flavors
// that provide quotas for these resources.
// Each resource and each flavor can only form part of one resource group.
// resourceGroups can be up to 16.
// +listType=atomic
// +kubebuilder:validation:MaxItems=16
ResourceGroups []ResourceGroup `json:"resourceGroups,omitempty"`
// cohort that this ClusterQueue belongs to. CQs that belong to the
// same cohort can borrow unused resources from each other.
//
// A CQ can be a member of a single borrowing cohort. A workload submitted
// to a queue referencing this CQ can borrow quota from any CQ in the cohort.
// Only quota for the [resource, flavor] pairs listed in the CQ can be
// borrowed.
// If empty, this ClusterQueue cannot borrow from any other ClusterQueue and
// vice versa.
//
// A cohort is a name that links CQs together, but it doesn't reference any
// object.
//
// Validation of a cohort name is equivalent to that of object names:
// subdomain in DNS (RFC 1123).
Cohort string `json:"cohort,omitempty"`
// QueueingStrategy indicates the queueing strategy of the workloads
// across the queues in this ClusterQueue. This field is immutable.
// Current Supported Strategies:
//
// - StrictFIFO: workloads are ordered strictly by creation time.
// Older workloads that can't be admitted will block admitting newer
// workloads even if they fit available quota.
// - BestEffortFIFO: workloads are ordered by creation time,
// however older workloads that can't be admitted will not block
// admitting newer workloads that fit existing quota.
//
// +kubebuilder:default=BestEffortFIFO
// +kubebuilder:validation:Enum=StrictFIFO;BestEffortFIFO
QueueingStrategy QueueingStrategy `json:"queueingStrategy,omitempty"`
// namespaceSelector defines which namespaces are allowed to submit workloads to
// this clusterQueue. Beyond this basic support for policy, a policy agent like
// Gatekeeper should be used to enforce more advanced policies.
// Defaults to null which is a nothing selector (no namespaces eligible).
// If set to an empty selector `{}`, then all namespaces are eligible.
NamespaceSelector *metav1.LabelSelector `json:"namespaceSelector,omitempty"`
// flavorFungibility defines whether a workload should try the next flavor
// before borrowing or preempting in the flavor being evaluated.
FlavorFungibility *FlavorFungibility `json:"flavorFungibility,omitempty"`
// preemption describes policies to preempt Workloads from this ClusterQueue
// or the ClusterQueue's cohort.
//
// Preemption can happen in two scenarios:
//
// - When a Workload fits within the nominal quota of the ClusterQueue, but
// the quota is currently borrowed by other ClusterQueues in the cohort.
// Preempting Workloads in other ClusterQueues allows this ClusterQueue to
// reclaim its nominal quota.
// - When a Workload doesn't fit within the nominal quota of the ClusterQueue
// and there are admitted Workloads in the ClusterQueue with lower priority.
//
// The preemption algorithm tries to find a minimal set of Workloads to
// preempt to accomomdate the pending Workload, preempting Workloads with
// lower priority first.
Preemption *ClusterQueuePreemption `json:"preemption,omitempty"`
// admissionChecks lists the AdmissionChecks required by this ClusterQueue
// +optional
AdmissionChecks []string `json:"admissionChecks,omitempty"`
// stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being
// made.
//
// Depending on its value, its associated workloads will:
//
// - None - Workloads are admitted
// - HoldAndDrain - Admitted workloads are evicted and Reserving workloads will cancel the reservation.
// - Hold - Admitted workloads will run to completion and Reserving workloads will cancel the reservation.
//
// +optional
// +kubebuilder:validation:Enum=None;Hold;HoldAndDrain
// +kubebuilder:default="None"
StopPolicy *StopPolicy `json:"stopPolicy,omitempty"`
}
type QueueingStrategy string
const (
// StrictFIFO means that workloads of the same priority are ordered strictly by creation time.
// Older workloads that can't be admitted will block admitting newer
// workloads even if they fit available quota.
StrictFIFO QueueingStrategy = "StrictFIFO"
// BestEffortFIFO means that workloads of the same priority are ordered by creation time,
// however older workloads that can't be admitted will not block
// admitting newer workloads that fit existing quota.
BestEffortFIFO QueueingStrategy = "BestEffortFIFO"
)
type StopPolicy string
const (
None StopPolicy = "None"
HoldAndDrain StopPolicy = "HoldAndDrain"
Hold StopPolicy = "Hold"
)
type ResourceGroup struct {
// coveredResources is the list of resources covered by the flavors in this
// group.
// Examples: cpu, memory, vendor.com/gpu.
// The list cannot be empty and it can contain up to 16 resources.
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=16
CoveredResources []corev1.ResourceName `json:"coveredResources"`
// flavors is the list of flavors that provide the resources of this group.
// Typically, different flavors represent different hardware models
// (e.g., gpu models, cpu architectures) or pricing models (on-demand vs spot
// cpus).
// Each flavor MUST list all the resources listed for this group in the same
// order as the .resources field.
// The list cannot be empty and it can contain up to 16 flavors.
// +listType=map
// +listMapKey=name
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=16
Flavors []FlavorQuotas `json:"flavors"`
}
type FlavorQuotas struct {
// name of this flavor. The name should match the .metadata.name of a
// ResourceFlavor. If a matching ResourceFlavor does not exist, the
// ClusterQueue will have an Active condition set to False.
Name ResourceFlavorReference `json:"name"`
// resources is the list of quotas for this flavor per resource.
// There could be up to 16 resources.
// +listType=map
// +listMapKey=name
// +kubebuilder:validation:MinItems=1
// +kubebuilder:validation:MaxItems=16
Resources []ResourceQuota `json:"resources"`
}
type ResourceQuota struct {
// name of this resource.
Name corev1.ResourceName `json:"name"`
// nominalQuota is the quantity of this resource that is available for
// Workloads admitted by this ClusterQueue at a point in time.
// The nominalQuota must be non-negative.
// nominalQuota should represent the resources in the cluster available for
// running jobs (after discounting resources consumed by system components
// and pods not managed by kueue). In an autoscaled cluster, nominalQuota
// should account for resources that can be provided by a component such as
// Kubernetes cluster-autoscaler.
//
// If the ClusterQueue belongs to a cohort, the sum of the quotas for each
// (flavor, resource) combination defines the maximum quantity that can be
// allocated by a ClusterQueue in the cohort.
NominalQuota resource.Quantity `json:"nominalQuota"`
// borrowingLimit is the maximum amount of quota for the [flavor, resource]
// combination that this ClusterQueue is allowed to borrow from the unused
// quota of other ClusterQueues in the same cohort.
// In total, at a given time, Workloads in a ClusterQueue can consume a
// quantity of quota equal to nominalQuota+borrowingLimit, assuming the other
// ClusterQueues in the cohort have enough unused quota.
// If null, it means that there is no borrowing limit.
// If not null, it must be non-negative.
// borrowingLimit must be null if spec.cohort is empty.
// +optional
BorrowingLimit *resource.Quantity `json:"borrowingLimit,omitempty"`
// lendingLimit is the maximum amount of unused quota for the [flavor, resource]
// combination that this ClusterQueue can lend to other ClusterQueues in the same cohort.
// In total, at a given time, ClusterQueue reserves for its exclusive use
// a quantity of quota equals to nominalQuota - lendingLimit.
// If null, it means that there is no lending limit, meaning that
// all the nominalQuota can be borrowed by other clusterQueues in the cohort.
// If not null, it must be non-negative.
// lendingLimit must be null if spec.cohort is empty.
// This field is in alpha stage. To be able to use this field,
// enable the feature gate LendingLimit, which is disabled by default.
// +optional
LendingLimit *resource.Quantity `json:"lendingLimit,omitempty"`
}
// ResourceFlavorReference is the name of the ResourceFlavor.
type ResourceFlavorReference string
// ClusterQueueStatus defines the observed state of ClusterQueue
type ClusterQueueStatus struct {
// flavorsReservation are the reserved quotas, by flavor, currently in use by the
// workloads assigned to this ClusterQueue.
// +listType=map
// +listMapKey=name
// +kubebuilder:validation:MaxItems=16
// +optional
FlavorsReservation []FlavorUsage `json:"flavorsReservation"`
// flavorsUsage are the used quotas, by flavor, currently in use by the
// workloads admitted in this ClusterQueue.
// +listType=map
// +listMapKey=name
// +kubebuilder:validation:MaxItems=16
// +optional
FlavorsUsage []FlavorUsage `json:"flavorsUsage"`
// pendingWorkloads is the number of workloads currently waiting to be
// admitted to this clusterQueue.
// +optional
PendingWorkloads int32 `json:"pendingWorkloads"`
// reservingWorkloads is the number of workloads currently reserving quota in this
// clusterQueue.
// +optional
ReservingWorkloads int32 `json:"reservingWorkloads"`
// admittedWorkloads is the number of workloads currently admitted to this
// clusterQueue and haven't finished yet.
// +optional
AdmittedWorkloads int32 `json:"admittedWorkloads"`
// conditions hold the latest available observations of the ClusterQueue
// current state.
// +optional
// +listType=map
// +listMapKey=type
// +patchStrategy=merge
// +patchMergeKey=type
Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"`
// PendingWorkloadsStatus contains the information exposed about the current
// status of the pending workloads in the cluster queue.
// +optional
PendingWorkloadsStatus *ClusterQueuePendingWorkloadsStatus `json:"pendingWorkloadsStatus"`
}
type ClusterQueuePendingWorkloadsStatus struct {
// Head contains the list of top pending workloads.
// +listType=atomic
// +optional
Head []ClusterQueuePendingWorkload `json:"clusterQueuePendingWorkload"`
// LastChangeTime indicates the time of the last change of the structure.
LastChangeTime metav1.Time `json:"lastChangeTime"`
}
// ClusterQueuePendingWorkload contains the information identifying a pending workload
// in the cluster queue.
type ClusterQueuePendingWorkload struct {
// Name indicates the name of the pending workload.
Name string `json:"name"`
// Namespace indicates the name of the pending workload.
Namespace string `json:"namespace"`
}
type FlavorUsage struct {
// name of the flavor.
Name ResourceFlavorReference `json:"name"`
// resources lists the quota usage for the resources in this flavor.
// +listType=map
// +listMapKey=name
// +kubebuilder:validation:MaxItems=16
Resources []ResourceUsage `json:"resources"`
}
type ResourceUsage struct {
// name of the resource
Name corev1.ResourceName `json:"name"`
// total is the total quantity of used quota, including the amount borrowed
// from the cohort.
Total resource.Quantity `json:"total,omitempty"`
// Borrowed is quantity of quota that is borrowed from the cohort. In other
// words, it's the used quota that is over the nominalQuota.
Borrowed resource.Quantity `json:"borrowed,omitempty"`
}
const (
// ClusterQueueActive indicates that the ClusterQueue can admit new workloads and its quota
// can be borrowed by other ClusterQueues in the same cohort.
ClusterQueueActive string = "Active"
)
type PreemptionPolicy string
const (
PreemptionPolicyNever PreemptionPolicy = "Never"
PreemptionPolicyAny PreemptionPolicy = "Any"
PreemptionPolicyLowerPriority PreemptionPolicy = "LowerPriority"
PreemptionPolicyLowerOrNewerEqualPriority PreemptionPolicy = "LowerOrNewerEqualPriority"
)
type FlavorFungibilityPolicy string
const (
Borrow FlavorFungibilityPolicy = "Borrow"
Preempt FlavorFungibilityPolicy = "Preempt"
TryNextFlavor FlavorFungibilityPolicy = "TryNextFlavor"
)
// FlavorFungibility determines whether a workload should try the next flavor
// before borrowing or preempting in current flavor.
type FlavorFungibility struct {
// whenCanBorrow determines whether a workload should try the next flavor
// before borrowing in current flavor. The possible values are:
//
// - `Borrow` (default): allocate in current flavor if borrowing
// is possible.
// - `TryNextFlavor`: try next flavor even if the current
// flavor has enough resources to borrow.
//
// +kubebuilder:validation:Enum={Borrow,TryNextFlavor}
// +kubebuilder:default="Borrow"
WhenCanBorrow FlavorFungibilityPolicy `json:"whenCanBorrow,omitempty"`
// whenCanPreempt determines whether a workload should try the next flavor
// before borrowing in current flavor. The possible values are:
//
// - `Preempt`: allocate in current flavor if it's possible to preempt some workloads.
// - `TryNextFlavor` (default): try next flavor even if there are enough
// candidates for preemption in the current flavor.
//
// +kubebuilder:validation:Enum={Preempt,TryNextFlavor}
// +kubebuilder:default="TryNextFlavor"
WhenCanPreempt FlavorFungibilityPolicy `json:"whenCanPreempt,omitempty"`
}
// ClusterQueuePreemption contains policies to preempt Workloads from this
// ClusterQueue or the ClusterQueue's cohort.
type ClusterQueuePreemption struct {
// reclaimWithinCohort determines whether a pending Workload can preempt
// Workloads from other ClusterQueues in the cohort that are using more than
// their nominal quota. The possible values are:
//
// - `Never` (default): do not preempt Workloads in the cohort.
// - `LowerPriority`: if the pending Workload fits within the nominal
// quota of its ClusterQueue, only preempt Workloads in the cohort that have
// lower priority than the pending Workload.
// - `Any`: if the pending Workload fits within the nominal quota of its
// ClusterQueue, preempt any Workload in the cohort, irrespective of
// priority.
//
// +kubebuilder:default=Never
// +kubebuilder:validation:Enum=Never;LowerPriority;Any
ReclaimWithinCohort PreemptionPolicy `json:"reclaimWithinCohort,omitempty"`
// borrowWithinCohort provides configuration to allow preemption within
// cohort while borrowing.
BorrowWithinCohort *BorrowWithinCohort `json:"borrowWithinCohort,omitempty"`
// withinClusterQueue determines whether a pending Workload that doesn't fit
// within the nominal quota for its ClusterQueue, can preempt active Workloads in
// the ClusterQueue. The possible values are:
//
// - `Never` (default): do not preempt Workloads in the ClusterQueue.
// - `LowerPriority`: only preempt Workloads in the ClusterQueue that have
// lower priority than the pending Workload.
// - `LowerOrNewerEqualPriority`: only preempt Workloads in the ClusterQueue that
// either have a lower priority than the pending workload or equal priority
// and are newer than the pending workload.
//
// +kubebuilder:default=Never
// +kubebuilder:validation:Enum=Never;LowerPriority;LowerOrNewerEqualPriority
WithinClusterQueue PreemptionPolicy `json:"withinClusterQueue,omitempty"`
}
type BorrowWithinCohortPolicy string
const (
BorrowWithinCohortPolicyNever BorrowWithinCohortPolicy = "Never"
BorrowWithinCohortPolicyLowerPriority BorrowWithinCohortPolicy = "LowerPriority"
)
// BorrowWithinCohort contains configuration which allows to preempt workloads
// within cohort while borrowing.
type BorrowWithinCohort struct {
// policy determines the policy for preemption to reclaim quota within cohort while borrowing.
// Possible values are:
// - `Never` (default): do not allow for preemption, in other
// ClusterQueues within the cohort, for a borrowing workload.
// - `LowerPriority`: allow preemption, in other ClusterQueues
// within the cohort, for a borrowing workload, but only if
// the preempted workloads are of lower priority.
//
// +kubebuilder:default=Never
// +kubebuilder:validation:Enum=Never;LowerPriority
Policy BorrowWithinCohortPolicy `json:"policy,omitempty"`
// maxPriorityThreshold allows to restrict the set of workloads which
// might be preempted by a borrowing workload, to only workloads with
// priority less than or equal to the specified threshold priority.
// When the threshold is not specified, then any workload satisfying the
// policy can be preempted by the borrowing workload.
//
// +optional
MaxPriorityThreshold *int32 `json:"maxPriorityThreshold,omitempty"`
}
// +genclient
// +genclient:nonNamespaced
// +kubebuilder:object:root=true
// +kubebuilder:storageversion
// +kubebuilder:resource:scope=Cluster
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="Cohort",JSONPath=".spec.cohort",type=string,description="Cohort that this ClusterQueue belongs to"
// +kubebuilder:printcolumn:name="Strategy",JSONPath=".spec.queueingStrategy",type=string,description="The queueing strategy used to prioritize workloads",priority=1
// +kubebuilder:printcolumn:name="Pending Workloads",JSONPath=".status.pendingWorkloads",type=integer,description="Number of pending workloads"
// +kubebuilder:printcolumn:name="Admitted Workloads",JSONPath=".status.admittedWorkloads",type=integer,description="Number of admitted workloads that haven't finished yet",priority=1
// ClusterQueue is the Schema for the clusterQueue API.
type ClusterQueue struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec ClusterQueueSpec `json:"spec,omitempty"`
Status ClusterQueueStatus `json:"status,omitempty"`
}
// +kubebuilder:object:root=true
// ClusterQueueList contains a list of ClusterQueue
type ClusterQueueList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []ClusterQueue `json:"items"`
}
func init() {
SchemeBuilder.Register(&ClusterQueue{}, &ClusterQueueList{})
}