-
Notifications
You must be signed in to change notification settings - Fork 300
/
deployment.go
348 lines (314 loc) · 12.5 KB
/
deployment.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
package config
import (
"strings"
hyperv1 "github.com/openshift/hypershift/api/v1beta1"
"github.com/openshift/hypershift/support/util"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/utils/pointer"
)
const (
// ManagedByLabel can be used to filter deployments.
ManagedByLabel = "hypershift.openshift.io/managed-by"
// There are used by NodeAffinity to prefer/tolerate Nodes.
controlPlaneLabelTolerationKey = "hypershift.openshift.io/control-plane"
clusterLabelTolerationKey = "hypershift.openshift.io/cluster"
// colocationLabelKey is used by PodAffinity to prefer colocating pods that belong to the same hosted cluster.
colocationLabelKey = "hypershift.openshift.io/hosted-control-plane"
// Specific cluster weight for soft affinity rule to node.
clusterNodeSchedulingAffinityWeight = 100
// Generic control plane workload weight for soft affinity rule to node.
controlPlaneNodeSchedulingAffinityWeight = clusterNodeSchedulingAffinityWeight / 2
)
type DeploymentConfig struct {
Replicas int `json:"replicas"`
Scheduling Scheduling `json:"scheduling"`
AdditionalLabels AdditionalLabels `json:"additionalLabels"`
AdditionalAnnotations AdditionalAnnotations `json:"additionalAnnotations"`
SecurityContexts SecurityContextSpec `json:"securityContexts"`
SetDefaultSecurityContext bool `json:"setDefaultSecurityContext"`
LivenessProbes LivenessProbes `json:"livenessProbes"`
ReadinessProbes ReadinessProbes `json:"readinessProbes"`
Resources ResourcesSpec `json:"resources"`
DebugDeployments sets.String `json:"debugDeployments"`
ResourceRequestOverrides ResourceOverrides `json:"resourceRequestOverrides"`
}
func (c *DeploymentConfig) SetContainerResourcesIfPresent(container *corev1.Container) {
resources := container.Resources
if len(resources.Requests) > 0 || len(resources.Limits) > 0 {
if c.Resources != nil {
c.Resources[container.Name] = resources
}
}
}
func (c *DeploymentConfig) SetRestartAnnotation(objectMetadata metav1.ObjectMeta) {
if _, ok := objectMetadata.Annotations[hyperv1.RestartDateAnnotation]; ok {
if c.AdditionalAnnotations == nil {
c.AdditionalAnnotations = make(AdditionalAnnotations)
}
c.AdditionalAnnotations[hyperv1.RestartDateAnnotation] = objectMetadata.Annotations[hyperv1.RestartDateAnnotation]
}
}
func (c *DeploymentConfig) SetReleaseImageAnnotation(releaseImage string) {
if c.AdditionalAnnotations == nil {
c.AdditionalAnnotations = make(AdditionalAnnotations)
}
c.AdditionalAnnotations[hyperv1.ReleaseImageAnnotation] = releaseImage
}
func (c *DeploymentConfig) ApplyTo(deployment *appsv1.Deployment) {
if c.DebugDeployments != nil && c.DebugDeployments.Has(deployment.Name) {
deployment.Spec.Replicas = pointer.Int32(0)
} else {
deployment.Spec.Replicas = pointer.Int32Ptr(int32(c.Replicas))
}
// there are two standard cases currently with hypershift: HA mode where there are 3 replicas spread across
// zones and then non ha with one replica. When only 3 zones are available you need to be able to set maxUnavailable
// in order to progress the rollout. However, you do not want to set that in the single replica case because it will
// result in downtime.
if c.Replicas > 1 {
maxSurge := intstr.FromInt(0)
maxUnavailable := intstr.FromInt(1)
if deployment.Spec.Strategy.RollingUpdate == nil {
deployment.Spec.Strategy.RollingUpdate = &appsv1.RollingUpdateDeployment{}
}
deployment.Spec.Strategy.RollingUpdate.MaxSurge = &maxSurge
deployment.Spec.Strategy.RollingUpdate.MaxUnavailable = &maxUnavailable
}
// set default security context for pod
if c.SetDefaultSecurityContext {
deployment.Spec.Template.Spec.SecurityContext = &corev1.PodSecurityContext{
RunAsUser: pointer.Int64(DefaultSecurityContextUser),
}
}
// set managed-by label
if deployment.Labels == nil {
deployment.Labels = map[string]string{}
}
deployment.Labels[ManagedByLabel] = "control-plane-operator"
c.Scheduling.ApplyTo(&deployment.Spec.Template.Spec)
c.AdditionalLabels.ApplyTo(&deployment.Spec.Template.ObjectMeta)
c.SecurityContexts.ApplyTo(&deployment.Spec.Template.Spec)
c.LivenessProbes.ApplyTo(&deployment.Spec.Template.Spec)
c.ReadinessProbes.ApplyTo(&deployment.Spec.Template.Spec)
c.Resources.ApplyTo(&deployment.Spec.Template.Spec)
c.ResourceRequestOverrides.ApplyRequestsTo(deployment.Name, &deployment.Spec.Template.Spec)
c.AdditionalAnnotations.ApplyTo(&deployment.Spec.Template.ObjectMeta)
}
func (c *DeploymentConfig) ApplyToDaemonSet(daemonset *appsv1.DaemonSet) {
// replicas is not used for DaemonSets
c.Scheduling.ApplyTo(&daemonset.Spec.Template.Spec)
c.AdditionalLabels.ApplyTo(&daemonset.Spec.Template.ObjectMeta)
c.SecurityContexts.ApplyTo(&daemonset.Spec.Template.Spec)
c.LivenessProbes.ApplyTo(&daemonset.Spec.Template.Spec)
c.ReadinessProbes.ApplyTo(&daemonset.Spec.Template.Spec)
c.Resources.ApplyTo(&daemonset.Spec.Template.Spec)
c.ResourceRequestOverrides.ApplyRequestsTo(daemonset.Name, &daemonset.Spec.Template.Spec)
c.AdditionalAnnotations.ApplyTo(&daemonset.Spec.Template.ObjectMeta)
}
func (c *DeploymentConfig) ApplyToStatefulSet(sts *appsv1.StatefulSet) {
sts.Spec.Replicas = pointer.Int32Ptr(int32(c.Replicas))
c.Scheduling.ApplyTo(&sts.Spec.Template.Spec)
c.AdditionalLabels.ApplyTo(&sts.Spec.Template.ObjectMeta)
c.SecurityContexts.ApplyTo(&sts.Spec.Template.Spec)
c.LivenessProbes.ApplyTo(&sts.Spec.Template.Spec)
c.ReadinessProbes.ApplyTo(&sts.Spec.Template.Spec)
c.Resources.ApplyTo(&sts.Spec.Template.Spec)
c.ResourceRequestOverrides.ApplyRequestsTo(sts.Name, &sts.Spec.Template.Spec)
c.AdditionalAnnotations.ApplyTo(&sts.Spec.Template.ObjectMeta)
}
func clusterKey(hcp *hyperv1.HostedControlPlane) string {
return hcp.Namespace
}
func colocationLabelValue(hcp *hyperv1.HostedControlPlane) string {
return clusterKey(hcp)
}
// setMultizoneSpread sets PodAntiAffinity with corev1.LabelTopologyZone as the topology key for a given set of labels.
// This is useful to e.g ensure pods are spread across availavility zones.
func (c *DeploymentConfig) setMultizoneSpread(labels map[string]string) {
if labels == nil {
return
}
if c.Scheduling.Affinity == nil {
c.Scheduling.Affinity = &corev1.Affinity{}
}
if c.Scheduling.Affinity.PodAntiAffinity == nil {
c.Scheduling.Affinity.PodAntiAffinity = &corev1.PodAntiAffinity{}
}
c.Scheduling.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution =
[]corev1.PodAffinityTerm{
{
TopologyKey: corev1.LabelTopologyZone,
LabelSelector: &metav1.LabelSelector{
MatchLabels: labels,
},
},
}
}
// setColocation sets labels and PodAffinity rules for this deployment so that pods
// of the deployment will prefer to group with pods of the anchor deployment.
func (c *DeploymentConfig) setColocation(hcp *hyperv1.HostedControlPlane) {
if c.Scheduling.Affinity == nil {
c.Scheduling.Affinity = &corev1.Affinity{}
}
if c.Scheduling.Affinity.PodAffinity == nil {
c.Scheduling.Affinity.PodAffinity = &corev1.PodAffinity{}
}
if c.AdditionalLabels == nil {
c.AdditionalLabels = map[string]string{}
}
c.AdditionalLabels[colocationLabelKey] = colocationLabelValue(hcp)
c.Scheduling.Affinity.PodAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []corev1.WeightedPodAffinityTerm{
{
Weight: 100,
PodAffinityTerm: corev1.PodAffinityTerm{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{
colocationLabelKey: colocationLabelValue(hcp),
},
},
TopologyKey: corev1.LabelHostname,
},
},
}
}
// setControlPlaneIsolation configures tolerations and NodeAffinity rules to prefer Nodes with controlPlaneNodeLabel and clusterNodeLabel.
func (c *DeploymentConfig) setControlPlaneIsolation(hcp *hyperv1.HostedControlPlane) {
c.Scheduling.Tolerations = []corev1.Toleration{
{
Key: controlPlaneLabelTolerationKey,
Operator: corev1.TolerationOpEqual,
Value: "true",
Effect: corev1.TaintEffectNoSchedule,
},
{
Key: clusterLabelTolerationKey,
Operator: corev1.TolerationOpEqual,
Value: clusterKey(hcp),
Effect: corev1.TaintEffectNoSchedule,
},
}
if c.Scheduling.Affinity == nil {
c.Scheduling.Affinity = &corev1.Affinity{}
}
if c.Scheduling.Affinity.NodeAffinity == nil {
c.Scheduling.Affinity.NodeAffinity = &corev1.NodeAffinity{}
}
c.Scheduling.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []corev1.PreferredSchedulingTerm{
{
Weight: controlPlaneNodeSchedulingAffinityWeight,
Preference: corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: controlPlaneLabelTolerationKey,
Operator: corev1.NodeSelectorOpIn,
Values: []string{"true"},
},
},
},
},
{
Weight: clusterNodeSchedulingAffinityWeight,
Preference: corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: clusterLabelTolerationKey,
Operator: corev1.NodeSelectorOpIn,
Values: []string{clusterKey(hcp)},
},
},
},
},
}
}
// setNodeSelector sets a nodeSelector passed through the API.
// This is useful to e.g ensure control plane pods land in management cluster Infra Nodes.
func (c *DeploymentConfig) setNodeSelector(hcp *hyperv1.HostedControlPlane) {
if hcp.Spec.NodeSelector == nil {
return
}
c.Scheduling.NodeSelector = hcp.Spec.NodeSelector
}
func (c *DeploymentConfig) setLocation(hcp *hyperv1.HostedControlPlane, multiZoneSpreadLabels map[string]string) {
c.setNodeSelector(hcp)
c.setControlPlaneIsolation(hcp)
c.setColocation(hcp)
// TODO (alberto): pass labels with deployment hash and set this unconditionally so we don't skew setup.
if c.Replicas > 1 {
c.setMultizoneSpread(multiZoneSpreadLabels)
}
}
func (c *DeploymentConfig) setReplicas(availability hyperv1.AvailabilityPolicy) {
switch availability {
case hyperv1.HighlyAvailable:
c.Replicas = 3
default:
c.Replicas = 1
}
}
// SetDefaults populates opinionated default DeploymentConfig for any Deployment.
func (c *DeploymentConfig) SetDefaults(hcp *hyperv1.HostedControlPlane, multiZoneSpreadLabels map[string]string, replicas *int) {
// If no replicas is specified then infer it from the ControllerAvailabilityPolicy.
if replicas == nil {
c.setReplicas(hcp.Spec.ControllerAvailabilityPolicy)
} else {
c.Replicas = *replicas
}
c.DebugDeployments = debugDeployments(hcp)
c.ResourceRequestOverrides = resourceRequestOverrides(hcp)
c.setLocation(hcp, multiZoneSpreadLabels)
// TODO (alberto): make this private, atm is needed for the konnectivity agent daemonset.
c.SetReleaseImageAnnotation(hcp.Spec.ReleaseImage)
}
func resourceRequestOverrides(hcp *hyperv1.HostedControlPlane) ResourceOverrides {
result := ResourceOverrides{}
for key, value := range hcp.Annotations {
if strings.HasPrefix(key, hyperv1.ResourceRequestOverrideAnnotationPrefix+"/") {
result = parseResourceRequestOverrideAnnotation(key, value, result)
}
}
return result
}
func parseResourceRequestOverrideAnnotation(key, value string, overrides ResourceOverrides) ResourceOverrides {
keyParts := strings.SplitN(key, "/", 2)
deploymentContainerParts := strings.SplitN(keyParts[1], ".", 2)
deployment, container := deploymentContainerParts[0], deploymentContainerParts[1]
resourceRequests := strings.Split(value, ",")
spec, exists := overrides[deployment]
if !exists {
spec = ResourcesSpec{}
}
requirements, exists := spec[container]
if !exists {
requirements = corev1.ResourceRequirements{}
}
if requirements.Requests == nil {
requirements.Requests = corev1.ResourceList{}
}
for _, request := range resourceRequests {
requestParts := strings.SplitN(request, "=", 2)
quantity, err := resource.ParseQuantity(requestParts[1])
if err != nil {
// Skip this request if invalid
continue
}
requirements.Requests[corev1.ResourceName(requestParts[0])] = quantity
}
spec[container] = requirements
overrides[deployment] = spec
return overrides
}
// debugDeployments returns a set of deployments to debug based on the
// debugDeploymentsAnnotation value, indicating the deployment should be considered to
// be in development mode.
func debugDeployments(hc *hyperv1.HostedControlPlane) sets.String {
val, exists := hc.Annotations[util.DebugDeploymentsAnnotation]
if !exists {
return nil
}
names := strings.Split(val, ",")
return sets.NewString(names...)
}