-
Notifications
You must be signed in to change notification settings - Fork 39.4k
/
resources.go
201 lines (180 loc) · 6.77 KB
/
resources.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package resources provides a metrics collector that reports the
// resource consumption (requests and limits) of the pods in the cluster
// as the scheduler and kubelet would interpret it.
package resources
import (
"net/http"
"strconv"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
corelisters "k8s.io/client-go/listers/core/v1"
"k8s.io/component-base/metrics"
v1resource "k8s.io/kubernetes/pkg/api/v1/resource"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
)
type resourceLifecycleDescriptors struct {
total *metrics.Desc
}
func (d resourceLifecycleDescriptors) Describe(ch chan<- *metrics.Desc) {
ch <- d.total
}
type resourceMetricsDescriptors struct {
requests resourceLifecycleDescriptors
limits resourceLifecycleDescriptors
}
func (d resourceMetricsDescriptors) Describe(ch chan<- *metrics.Desc) {
d.requests.Describe(ch)
d.limits.Describe(ch)
}
var podResourceDesc = resourceMetricsDescriptors{
requests: resourceLifecycleDescriptors{
total: metrics.NewDesc("kube_pod_resource_request",
"Resources requested by workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any.",
[]string{"namespace", "pod", "node", "scheduler", "priority", "resource", "unit"},
nil,
metrics.ALPHA,
""),
},
limits: resourceLifecycleDescriptors{
total: metrics.NewDesc("kube_pod_resource_limit",
"Resources limit for workloads on the cluster, broken down by pod. This shows the resource usage the scheduler and kubelet expect per pod for resources along with the unit for the resource if any.",
[]string{"namespace", "pod", "node", "scheduler", "priority", "resource", "unit"},
nil,
metrics.ALPHA,
""),
},
}
// Handler creates a collector from the provided podLister and returns an http.Handler that
// will report the requested metrics in the prometheus format. It does not include any other
// metrics.
func Handler(podLister corelisters.PodLister) http.Handler {
collector := NewPodResourcesMetricsCollector(podLister)
registry := metrics.NewKubeRegistry()
registry.CustomMustRegister(collector)
return metrics.HandlerWithReset(registry, metrics.HandlerOpts{})
}
// Check if resourceMetricsCollector implements necessary interface
var _ metrics.StableCollector = &podResourceCollector{}
// NewPodResourcesMetricsCollector registers a O(pods) cardinality metric that
// reports the current resources requested by all pods on the cluster within
// the Kubernetes resource model. Metrics are broken down by pod, node, resource,
// and phase of lifecycle. Each pod returns two series per resource - one for
// their aggregate usage (required to schedule) and one for their phase specific
// usage. This allows admins to assess the cost per resource at different phases
// of startup and compare to actual resource usage.
func NewPodResourcesMetricsCollector(podLister corelisters.PodLister) metrics.StableCollector {
return &podResourceCollector{
lister: podLister,
}
}
type podResourceCollector struct {
metrics.BaseStableCollector
lister corelisters.PodLister
}
func (c *podResourceCollector) DescribeWithStability(ch chan<- *metrics.Desc) {
podResourceDesc.Describe(ch)
}
func (c *podResourceCollector) CollectWithStability(ch chan<- metrics.Metric) {
pods, err := c.lister.List(labels.Everything())
if err != nil {
return
}
reuseReqs, reuseLimits := make(v1.ResourceList, 4), make(v1.ResourceList, 4)
for _, p := range pods {
reqs, limits, terminal := podRequestsAndLimitsByLifecycle(p, reuseReqs, reuseLimits)
if terminal {
// terminal pods are excluded from resource usage calculations
continue
}
for _, t := range []struct {
desc resourceLifecycleDescriptors
total v1.ResourceList
}{
{
desc: podResourceDesc.requests,
total: reqs,
},
{
desc: podResourceDesc.limits,
total: limits,
},
} {
for resourceName, val := range t.total {
var unitName string
switch resourceName {
case v1.ResourceCPU:
unitName = "cores"
case v1.ResourceMemory:
unitName = "bytes"
case v1.ResourceStorage:
unitName = "bytes"
case v1.ResourceEphemeralStorage:
unitName = "bytes"
default:
switch {
case v1helper.IsHugePageResourceName(resourceName):
unitName = "bytes"
case v1helper.IsAttachableVolumeResourceName(resourceName):
unitName = "integer"
}
}
var priority string
if p.Spec.Priority != nil {
priority = strconv.FormatInt(int64(*p.Spec.Priority), 10)
}
recordMetricWithUnit(ch, t.desc.total, p.Namespace, p.Name, p.Spec.NodeName, p.Spec.SchedulerName, priority, resourceName, unitName, val)
}
}
}
}
func recordMetricWithUnit(
ch chan<- metrics.Metric,
desc *metrics.Desc,
namespace, name, nodeName, schedulerName, priority string,
resourceName v1.ResourceName,
unit string,
val resource.Quantity,
) {
if val.IsZero() {
return
}
ch <- metrics.NewLazyConstMetric(desc, metrics.GaugeValue,
val.AsApproximateFloat64(),
namespace, name, nodeName, schedulerName, priority, string(resourceName), unit,
)
}
// podRequestsAndLimitsByLifecycle returns a dictionary of all defined resources summed up for all
// containers of the pod. If PodOverhead feature is enabled, pod overhead is added to the
// total container resource requests and to the total container limits which have a
// non-zero quantity. The caller may avoid allocations of resource lists by passing
// a requests and limits list to the function, which will be cleared before use.
// This method is the same as v1resource.PodRequestsAndLimits but avoids allocating in several
// scenarios for efficiency.
func podRequestsAndLimitsByLifecycle(pod *v1.Pod, reuseReqs, reuseLimits v1.ResourceList) (reqs, limits v1.ResourceList, terminal bool) {
switch {
case len(pod.Spec.NodeName) == 0:
// unscheduled pods cannot be terminal
case pod.Status.Phase == v1.PodSucceeded, pod.Status.Phase == v1.PodFailed:
terminal = true
// TODO: resolve https://github.com/kubernetes/kubernetes/issues/96515 and add a condition here
// for checking that terminal state
}
if terminal {
return
}
reqs, limits = v1resource.PodRequestsAndLimitsReuse(pod, reuseReqs, reuseLimits)
return
}