generated from kubernetes/kubernetes-template-project
-
Notifications
You must be signed in to change notification settings - Fork 33
/
jobset_types.go
257 lines (217 loc) · 10.5 KB
/
jobset_types.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// +k8s:openapi-gen=true
package v1alpha2
import (
batchv1 "k8s.io/api/batch/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
const (
JobSetNameKey string = "jobset.sigs.k8s.io/jobset-name"
ReplicatedJobReplicas string = "jobset.sigs.k8s.io/replicatedjob-replicas"
ReplicatedJobNameKey string = "jobset.sigs.k8s.io/replicatedjob-name"
JobIndexKey string = "jobset.sigs.k8s.io/job-index"
JobKey string = "jobset.sigs.k8s.io/job-key"
JobNameKey string = "job-name" // TODO(#26): Migrate to the fully qualified label name.
// ExclusiveKey is an annotation that can be set on the JobSet or on a ReplicatedJob template.
// If set at the JobSet level, all child jobs from all ReplicatedJobs will be scheduled using exclusive
// job placement per topology group (defined as the label value).
// If set at the ReplicatedJob level, all child jobs from the target ReplicatedJobs will be scheduled
// using exclusive job placement per topology group.
ExclusiveKey string = "alpha.jobset.sigs.k8s.io/exclusive-topology"
// NodeSelectorStrategyKey is an annotation that acts as a flag, the value does not matter.
// If set, the JobSet controller will automatically inject nodeSelectors for the JobSetNameKey label to
// ensure exclusive job placement per topology, instead of injecting pod affinity/anti-affinites for this.
// The user must add the JobSet name node label to the desired topologies separately.
NodeSelectorStrategyKey string = "alpha.jobset.sigs.k8s.io/node-selector"
NamespacedJobKey string = "alpha.jobset.sigs.k8s.io/namespaced-job"
NoScheduleTaintKey string = "alpha.jobset.sigs.k8s.io/no-schedule"
// JobSetControllerName is the reserved value for the managedBy field for the built-in
// JobSet controller.
JobSetControllerName = "jobset.sigs.k8s.io/jobset-controller"
)
type JobSetConditionType string
// These are built-in conditions of a JobSet.
const (
// JobSetCompleted means the job has completed its execution.
JobSetCompleted JobSetConditionType = "Completed"
// JobSetFailed means the job has failed its execution.
JobSetFailed JobSetConditionType = "Failed"
// JobSetSuspended means the job is suspended.
JobSetSuspended JobSetConditionType = "Suspended"
// JobSetStartupPolicyInProgress means the StartupPolicy is in progress.
JobSetStartupPolicyInProgress JobSetConditionType = "StartupPolicyInProgress"
// JobSetStartupPolicyCompleted means the StartupPolicy has completed.
JobSetStartupPolicyCompleted JobSetConditionType = "StartupPolicyCompleted"
)
// JobSetSpec defines the desired state of JobSet
type JobSetSpec struct {
// ReplicatedJobs is the group of jobs that will form the set.
// +listType=map
// +listMapKey=name
ReplicatedJobs []ReplicatedJob `json:"replicatedJobs,omitempty"`
// Network defines the networking options for the jobset.
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
// +optional
Network *Network `json:"network,omitempty"`
// SuccessPolicy configures when to declare the JobSet as
// succeeded.
// The JobSet is always declared succeeded if all jobs in the set
// finished with status complete.
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
SuccessPolicy *SuccessPolicy `json:"successPolicy,omitempty"`
// FailurePolicy, if set, configures when to declare the JobSet as
// failed.
// The JobSet is always declared failed if any job in the set
// finished with status failed.
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
FailurePolicy *FailurePolicy `json:"failurePolicy,omitempty"`
// StartupPolicy, if set, configures in what order jobs must be started
// +kubebuilder:validation:XValidation:rule="self == oldSelf",message="Value is immutable"
StartupPolicy *StartupPolicy `json:"startupPolicy,omitempty"`
// Suspend suspends all running child Jobs when set to true.
Suspend *bool `json:"suspend,omitempty"`
// ManagedBy is used to indicate the controller or entity that manages a JobSet
ManagedBy *string `json:"managedBy,omitempty"`
// TTLSecondsAfterFinished limits the lifetime of a JobSet that has finished
// execution (either Complete or Failed). If this field is set,
// TTLSecondsAfterFinished after the JobSet finishes, it is eligible to be
// automatically deleted. When the JobSet is being deleted, its lifecycle
// guarantees (e.g. finalizers) will be honored. If this field is unset,
// the JobSet won't be automatically deleted. If this field is set to zero,
// the JobSet becomes eligible to be deleted immediately after it finishes.
// +kubebuilder:validation:Minimum=0
// +optional
TTLSecondsAfterFinished *int32 `json:"ttlSecondsAfterFinished,omitempty"`
}
// JobSetStatus defines the observed state of JobSet
type JobSetStatus struct {
// +optional
// +listType=map
// +listMapKey=type
Conditions []metav1.Condition `json:"conditions,omitempty"`
// Restarts tracks the number of times the JobSet has restarted (i.e. recreated in case of RecreateAll policy).
Restarts int32 `json:"restarts,omitempty"`
// ReplicatedJobsStatus track the number of JobsReady for each replicatedJob.
// +optional
// +listType=map
// +listMapKey=name
ReplicatedJobsStatus []ReplicatedJobStatus `json:"replicatedJobsStatus,omitempty"`
}
// ReplicatedJobStatus defines the observed ReplicatedJobs Readiness.
type ReplicatedJobStatus struct {
// Name of the ReplicatedJob.
Name string `json:"name"`
// Ready is the number of child Jobs where the number of ready pods and completed pods
// is greater than or equal to the total expected pod count for the Job (i.e., the minimum
// of job.spec.parallelism and job.spec.completions).
Ready int32 `json:"ready"`
// Succeeded is the number of successfully completed child Jobs.
Succeeded int32 `json:"succeeded"`
// Failed is the number of failed child Jobs.
Failed int32 `json:"failed"`
// Active is the number of child Jobs with at least 1 pod in a running or pending state
// which are not marked for deletion.
Active int32 `json:"active"`
// Suspended is the number of child Jobs which are in a suspended state.
Suspended int32 `json:"suspended"`
}
// +genclient
// +k8s:openapi-gen=true
// +kubebuilder:object:root=true
// +kubebuilder:subresource:status
// +kubebuilder:printcolumn:name="Restarts",JSONPath=".status.restarts",type=string,description="Number of restarts"
// +kubebuilder:printcolumn:name="Completed",type="string",priority=0,JSONPath=".status.conditions[?(@.type==\"Completed\")].status"
// +kubebuilder:printcolumn:name="Suspended",type="string",JSONPath=".spec.suspend",description="JobSet suspended"
// +kubebuilder:printcolumn:name="Age",JSONPath=".metadata.creationTimestamp",type=date,description="Time this JobSet was created"
// JobSet is the Schema for the jobsets API
type JobSet struct {
metav1.TypeMeta `json:",inline"`
metav1.ObjectMeta `json:"metadata,omitempty"`
Spec JobSetSpec `json:"spec,omitempty"`
Status JobSetStatus `json:"status,omitempty"`
}
// +kubebuilder:object:root=true
// JobSetList contains a list of JobSet
type JobSetList struct {
metav1.TypeMeta `json:",inline"`
metav1.ListMeta `json:"metadata,omitempty"`
Items []JobSet `json:"items"`
}
type ReplicatedJob struct {
// Name is the name of the entry and will be used as a suffix
// for the Job name.
Name string `json:"name"`
// Template defines the template of the Job that will be created.
Template batchv1.JobTemplateSpec `json:"template"`
// Replicas is the number of jobs that will be created from this ReplicatedJob's template.
// Jobs names will be in the format: <jobSet.name>-<spec.replicatedJob.name>-<job-index>
// +kubebuilder:default=1
Replicas int32 `json:"replicas,omitempty"`
}
type Network struct {
// EnableDNSHostnames allows pods to be reached via their hostnames.
// Pods will be reachable using the fully qualified pod hostname:
// <jobSet.name>-<spec.replicatedJob.name>-<job-index>-<pod-index>.<subdomain>
// +optional
EnableDNSHostnames *bool `json:"enableDNSHostnames,omitempty"`
// Subdomain is an explicit choice for a network subdomain name
// When set, any replicated job in the set is added to this network.
// Defaults to <jobSet.name> if not set.
// +optional
Subdomain string `json:"subdomain,omitempty"`
}
// Operator defines the target of a SuccessPolicy or FailurePolicy.
type Operator string
const (
// OperatorAll applies to all jobs matching the jobSelector.
OperatorAll Operator = "All"
// OperatorAny applies to any single job matching the jobSelector.
OperatorAny Operator = "Any"
)
type FailurePolicy struct {
// MaxRestarts defines the limit on the number of JobSet restarts.
// A restart is achieved by recreating all active child jobs.
MaxRestarts int32 `json:"maxRestarts,omitempty"`
}
type SuccessPolicy struct {
// Operator determines either All or Any of the selected jobs should succeed to consider the JobSet successful
// +kubebuilder:validation:Enum=All;Any
Operator Operator `json:"operator"`
// TargetReplicatedJobs are the names of the replicated jobs the operator will apply to.
// A null or empty list will apply to all replicatedJobs.
// +optional
// +listType=atomic
TargetReplicatedJobs []string `json:"targetReplicatedJobs,omitempty"`
}
type StartupPolicyOptions string
const (
// This is the default setting
// AnyOrder means that we will start the replicated jobs
// without any specific order.
AnyOrder StartupPolicyOptions = "AnyOrder"
// InOrder starts the replicated jobs in order
// that they are listed.
InOrder StartupPolicyOptions = "InOrder"
)
type StartupPolicy struct {
// StartupPolicyOrder determines the startup order of the ReplicatedJobs.
// AnyOrder means to start replicated jobs in any order.
// InOrder means to start them as they are listed in the JobSet. A ReplicatedJob is started only
// when all the jobs of the previous one are ready.
// +kubebuilder:validation:Enum=AnyOrder;InOrder
StartupPolicyOrder StartupPolicyOptions `json:"startupPolicyOrder"`
}
func init() {
SchemeBuilder.Register(&JobSet{}, &JobSetList{})
}