-
Notifications
You must be signed in to change notification settings - Fork 38.7k
/
helpers.go
447 lines (388 loc) · 16.3 KB
/
helpers.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
/*
Copyright 2014 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package resource
import (
"fmt"
"math"
"strconv"
"strings"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
)
// PodResourcesOptions controls the behavior of PodRequests and PodLimits.
type PodResourcesOptions struct {
// Reuse, if provided will be reused to accumulate resources and returned by the PodRequests or PodLimits
// functions. All existing values in Reuse will be lost.
Reuse v1.ResourceList
// InPlacePodVerticalScalingEnabled indicates that the in-place pod vertical scaling feature gate is enabled.
InPlacePodVerticalScalingEnabled bool
// ExcludeOverhead controls if pod overhead is excluded from the calculation.
ExcludeOverhead bool
// ContainerFn is called with the effective resources required for each container within the pod.
ContainerFn func(res v1.ResourceList, containerType podutil.ContainerType)
// NonMissingContainerRequests if provided will replace any missing container level requests for the specified resources
// with the given values. If the requests for those resources are explicitly set, even if zero, they will not be modified.
NonMissingContainerRequests v1.ResourceList
}
// PodRequests computes the pod requests per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then
// the requests are returned including pod overhead. The computation is part of the API and must be reviewed
// as an API change.
func PodRequests(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList {
// attempt to reuse the maps if passed, or allocate otherwise
reqs := reuseOrClearResourceList(opts.Reuse)
var containerStatuses map[string]*v1.ContainerStatus
if opts.InPlacePodVerticalScalingEnabled {
containerStatuses = make(map[string]*v1.ContainerStatus, len(pod.Status.ContainerStatuses))
for i := range pod.Status.ContainerStatuses {
containerStatuses[pod.Status.ContainerStatuses[i].Name] = &pod.Status.ContainerStatuses[i]
}
}
for _, container := range pod.Spec.Containers {
containerReqs := container.Resources.Requests
if opts.InPlacePodVerticalScalingEnabled {
cs, found := containerStatuses[container.Name]
if found {
if pod.Status.Resize == v1.PodResizeStatusInfeasible {
containerReqs = cs.AllocatedResources.DeepCopy()
} else {
containerReqs = max(container.Resources.Requests, cs.AllocatedResources)
}
}
}
if len(opts.NonMissingContainerRequests) > 0 {
containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests)
}
if opts.ContainerFn != nil {
opts.ContainerFn(containerReqs, podutil.Containers)
}
addResourceList(reqs, containerReqs)
}
restartableInitContainerReqs := v1.ResourceList{}
initContainerReqs := v1.ResourceList{}
// init containers define the minimum of any resource
// Note: In-place resize is not allowed for InitContainers, so no need to check for ResizeStatus value
//
// Let's say `InitContainerUse(i)` is the resource requirements when the i-th
// init container is initializing, then
// `InitContainerUse(i) = sum(Resources of restartable init containers with index < i) + Resources of i-th init container`.
//
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#exposing-pod-resource-requirements for the detail.
for _, container := range pod.Spec.InitContainers {
containerReqs := container.Resources.Requests
if len(opts.NonMissingContainerRequests) > 0 {
containerReqs = applyNonMissing(containerReqs, opts.NonMissingContainerRequests)
}
if container.RestartPolicy != nil && *container.RestartPolicy == v1.ContainerRestartPolicyAlways {
// and add them to the resulting cumulative container requests
addResourceList(reqs, containerReqs)
// track our cumulative restartable init container resources
addResourceList(restartableInitContainerReqs, containerReqs)
containerReqs = restartableInitContainerReqs
} else {
tmp := v1.ResourceList{}
addResourceList(tmp, containerReqs)
addResourceList(tmp, restartableInitContainerReqs)
containerReqs = tmp
}
if opts.ContainerFn != nil {
opts.ContainerFn(containerReqs, podutil.InitContainers)
}
maxResourceList(initContainerReqs, containerReqs)
}
maxResourceList(reqs, initContainerReqs)
// Add overhead for running a pod to the sum of requests if requested:
if !opts.ExcludeOverhead && pod.Spec.Overhead != nil {
addResourceList(reqs, pod.Spec.Overhead)
}
return reqs
}
// applyNonMissing will return a copy of the given resource list with any missing values replaced by the nonMissing values
func applyNonMissing(reqs v1.ResourceList, nonMissing v1.ResourceList) v1.ResourceList {
cp := v1.ResourceList{}
for k, v := range reqs {
cp[k] = v.DeepCopy()
}
for k, v := range nonMissing {
if _, found := reqs[k]; !found {
rk := cp[k]
rk.Add(v)
cp[k] = rk
}
}
return cp
}
// PodLimits computes the pod limits per the PodResourcesOptions supplied. If PodResourcesOptions is nil, then
// the limits are returned including pod overhead for any non-zero limits. The computation is part of the API and must be reviewed
// as an API change.
func PodLimits(pod *v1.Pod, opts PodResourcesOptions) v1.ResourceList {
// attempt to reuse the maps if passed, or allocate otherwise
limits := reuseOrClearResourceList(opts.Reuse)
for _, container := range pod.Spec.Containers {
if opts.ContainerFn != nil {
opts.ContainerFn(container.Resources.Limits, podutil.Containers)
}
addResourceList(limits, container.Resources.Limits)
}
restartableInitContainerLimits := v1.ResourceList{}
initContainerLimits := v1.ResourceList{}
// init containers define the minimum of any resource
//
// Let's say `InitContainerUse(i)` is the resource requirements when the i-th
// init container is initializing, then
// `InitContainerUse(i) = sum(Resources of restartable init containers with index < i) + Resources of i-th init container`.
//
// See https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/753-sidecar-containers#exposing-pod-resource-requirements for the detail.
for _, container := range pod.Spec.InitContainers {
containerLimits := container.Resources.Limits
// Is the init container marked as a restartable init container?
if container.RestartPolicy != nil && *container.RestartPolicy == v1.ContainerRestartPolicyAlways {
addResourceList(limits, containerLimits)
// track our cumulative restartable init container resources
addResourceList(restartableInitContainerLimits, containerLimits)
containerLimits = restartableInitContainerLimits
} else {
tmp := v1.ResourceList{}
addResourceList(tmp, containerLimits)
addResourceList(tmp, restartableInitContainerLimits)
containerLimits = tmp
}
if opts.ContainerFn != nil {
opts.ContainerFn(containerLimits, podutil.InitContainers)
}
maxResourceList(initContainerLimits, containerLimits)
}
maxResourceList(limits, initContainerLimits)
// Add overhead to non-zero limits if requested:
if !opts.ExcludeOverhead && pod.Spec.Overhead != nil {
for name, quantity := range pod.Spec.Overhead {
if value, ok := limits[name]; ok && !value.IsZero() {
value.Add(quantity)
limits[name] = value
}
}
}
return limits
}
// addResourceList adds the resources in newList to list.
func addResourceList(list, newList v1.ResourceList) {
for name, quantity := range newList {
if value, ok := list[name]; !ok {
list[name] = quantity.DeepCopy()
} else {
value.Add(quantity)
list[name] = value
}
}
}
// maxResourceList sets list to the greater of list/newList for every resource in newList
func maxResourceList(list, newList v1.ResourceList) {
for name, quantity := range newList {
if value, ok := list[name]; !ok || quantity.Cmp(value) > 0 {
list[name] = quantity.DeepCopy()
}
}
}
// max returns the result of max(a, b) for each named resource and is only used if we can't
// accumulate into an existing resource list
func max(a v1.ResourceList, b v1.ResourceList) v1.ResourceList {
result := v1.ResourceList{}
for key, value := range a {
if other, found := b[key]; found {
if value.Cmp(other) <= 0 {
result[key] = other.DeepCopy()
continue
}
}
result[key] = value.DeepCopy()
}
for key, value := range b {
if _, found := result[key]; !found {
result[key] = value.DeepCopy()
}
}
return result
}
// reuseOrClearResourceList is a helper for avoiding excessive allocations of
// resource lists within the inner loop of resource calculations.
func reuseOrClearResourceList(reuse v1.ResourceList) v1.ResourceList {
if reuse == nil {
return make(v1.ResourceList, 4)
}
for k := range reuse {
delete(reuse, k)
}
return reuse
}
// GetResourceRequestQuantity finds and returns the request quantity for a specific resource.
func GetResourceRequestQuantity(pod *v1.Pod, resourceName v1.ResourceName) resource.Quantity {
requestQuantity := resource.Quantity{}
switch resourceName {
case v1.ResourceCPU:
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
case v1.ResourceMemory, v1.ResourceStorage, v1.ResourceEphemeralStorage:
requestQuantity = resource.Quantity{Format: resource.BinarySI}
default:
requestQuantity = resource.Quantity{Format: resource.DecimalSI}
}
for _, container := range pod.Spec.Containers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
requestQuantity.Add(rQuantity)
}
}
for _, container := range pod.Spec.InitContainers {
if rQuantity, ok := container.Resources.Requests[resourceName]; ok {
if requestQuantity.Cmp(rQuantity) < 0 {
requestQuantity = rQuantity.DeepCopy()
}
}
}
// Add overhead for running a pod
// to the total requests if the resource total is non-zero
if pod.Spec.Overhead != nil {
if podOverhead, ok := pod.Spec.Overhead[resourceName]; ok && !requestQuantity.IsZero() {
requestQuantity.Add(podOverhead)
}
}
return requestQuantity
}
// GetResourceRequest finds and returns the request value for a specific resource.
func GetResourceRequest(pod *v1.Pod, resource v1.ResourceName) int64 {
if resource == v1.ResourcePods {
return 1
}
requestQuantity := GetResourceRequestQuantity(pod, resource)
if resource == v1.ResourceCPU {
return requestQuantity.MilliValue()
}
return requestQuantity.Value()
}
// ExtractResourceValueByContainerName extracts the value of a resource
// by providing container name
func ExtractResourceValueByContainerName(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string) (string, error) {
container, err := findContainerInPod(pod, containerName)
if err != nil {
return "", err
}
return ExtractContainerResourceValue(fs, container)
}
// ExtractResourceValueByContainerNameAndNodeAllocatable extracts the value of a resource
// by providing container name and node allocatable
func ExtractResourceValueByContainerNameAndNodeAllocatable(fs *v1.ResourceFieldSelector, pod *v1.Pod, containerName string, nodeAllocatable v1.ResourceList) (string, error) {
realContainer, err := findContainerInPod(pod, containerName)
if err != nil {
return "", err
}
container := realContainer.DeepCopy()
MergeContainerResourceLimits(container, nodeAllocatable)
return ExtractContainerResourceValue(fs, container)
}
// ExtractContainerResourceValue extracts the value of a resource
// in an already known container
func ExtractContainerResourceValue(fs *v1.ResourceFieldSelector, container *v1.Container) (string, error) {
divisor := resource.Quantity{}
if divisor.Cmp(fs.Divisor) == 0 {
divisor = resource.MustParse("1")
} else {
divisor = fs.Divisor
}
switch fs.Resource {
case "limits.cpu":
return convertResourceCPUToString(container.Resources.Limits.Cpu(), divisor)
case "limits.memory":
return convertResourceMemoryToString(container.Resources.Limits.Memory(), divisor)
case "limits.ephemeral-storage":
return convertResourceEphemeralStorageToString(container.Resources.Limits.StorageEphemeral(), divisor)
case "requests.cpu":
return convertResourceCPUToString(container.Resources.Requests.Cpu(), divisor)
case "requests.memory":
return convertResourceMemoryToString(container.Resources.Requests.Memory(), divisor)
case "requests.ephemeral-storage":
return convertResourceEphemeralStorageToString(container.Resources.Requests.StorageEphemeral(), divisor)
}
// handle extended standard resources with dynamic names
// example: requests.hugepages-<pageSize> or limits.hugepages-<pageSize>
if strings.HasPrefix(fs.Resource, "requests.") {
resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "requests."))
if IsHugePageResourceName(resourceName) {
return convertResourceHugePagesToString(container.Resources.Requests.Name(resourceName, resource.BinarySI), divisor)
}
}
if strings.HasPrefix(fs.Resource, "limits.") {
resourceName := v1.ResourceName(strings.TrimPrefix(fs.Resource, "limits."))
if IsHugePageResourceName(resourceName) {
return convertResourceHugePagesToString(container.Resources.Limits.Name(resourceName, resource.BinarySI), divisor)
}
}
return "", fmt.Errorf("unsupported container resource : %v", fs.Resource)
}
// convertResourceCPUToString converts cpu value to the format of divisor and returns
// ceiling of the value.
func convertResourceCPUToString(cpu *resource.Quantity, divisor resource.Quantity) (string, error) {
c := int64(math.Ceil(float64(cpu.MilliValue()) / float64(divisor.MilliValue())))
return strconv.FormatInt(c, 10), nil
}
// convertResourceMemoryToString converts memory value to the format of divisor and returns
// ceiling of the value.
func convertResourceMemoryToString(memory *resource.Quantity, divisor resource.Quantity) (string, error) {
m := int64(math.Ceil(float64(memory.Value()) / float64(divisor.Value())))
return strconv.FormatInt(m, 10), nil
}
// convertResourceHugePagesToString converts hugepages value to the format of divisor and returns
// ceiling of the value.
func convertResourceHugePagesToString(hugePages *resource.Quantity, divisor resource.Quantity) (string, error) {
m := int64(math.Ceil(float64(hugePages.Value()) / float64(divisor.Value())))
return strconv.FormatInt(m, 10), nil
}
// convertResourceEphemeralStorageToString converts ephemeral storage value to the format of divisor and returns
// ceiling of the value.
func convertResourceEphemeralStorageToString(ephemeralStorage *resource.Quantity, divisor resource.Quantity) (string, error) {
m := int64(math.Ceil(float64(ephemeralStorage.Value()) / float64(divisor.Value())))
return strconv.FormatInt(m, 10), nil
}
// findContainerInPod finds a container by its name in the provided pod
func findContainerInPod(pod *v1.Pod, containerName string) (*v1.Container, error) {
for _, container := range pod.Spec.Containers {
if container.Name == containerName {
return &container, nil
}
}
for _, container := range pod.Spec.InitContainers {
if container.Name == containerName {
return &container, nil
}
}
return nil, fmt.Errorf("container %s not found", containerName)
}
// MergeContainerResourceLimits checks if a limit is applied for
// the container, and if not, it sets the limit to the passed resource list.
func MergeContainerResourceLimits(container *v1.Container,
allocatable v1.ResourceList) {
if container.Resources.Limits == nil {
container.Resources.Limits = make(v1.ResourceList)
}
// NOTE: we exclude hugepages-* resources because hugepages are never overcommitted.
// This means that the container always has a limit specified.
for _, resource := range []v1.ResourceName{v1.ResourceCPU, v1.ResourceMemory, v1.ResourceEphemeralStorage} {
if quantity, exists := container.Resources.Limits[resource]; !exists || quantity.IsZero() {
if cap, exists := allocatable[resource]; exists {
container.Resources.Limits[resource] = cap.DeepCopy()
}
}
}
}
// IsHugePageResourceName returns true if the resource name has the huge page
// resource prefix.
func IsHugePageResourceName(name v1.ResourceName) bool {
return strings.HasPrefix(string(name), v1.ResourceHugePagesPrefix)
}