forked from kubernetes-sigs/scheduler-plugins
-
Notifications
You must be signed in to change notification settings - Fork 0
/
filter.go
157 lines (136 loc) · 6.23 KB
/
filter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package noderesourcetopology
import (
"context"
"fmt"
"strings"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/klog/v2"
v1qos "k8s.io/kubernetes/pkg/apis/core/v1/helper/qos"
bm "k8s.io/kubernetes/pkg/kubelet/cm/topologymanager/bitmask"
"k8s.io/kubernetes/pkg/scheduler/framework"
topologyv1alpha1 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha1"
)
type PolicyHandler func(pod *v1.Pod, zoneMap topologyv1alpha1.ZoneList) *framework.Status
func singleNUMAContainerLevelHandler(pod *v1.Pod, zones topologyv1alpha1.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status {
klog.V(5).InfoS("Single NUMA node handler")
// prepare NUMANodes list from zoneMap
nodes := createNUMANodeList(zones)
qos := v1qos.GetPodQOS(pod)
// We count here in the way TopologyManager is doing it, IOW we put InitContainers
// and normal containers in the one scope
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
if resMatchNUMANodes(nodes, container.Resources.Requests, qos, nodeInfo) {
// definitely we can't align container, so we can't align a pod
return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("cannot align container: %s", container.Name))
}
}
return nil
}
// resMatchNUMANodes checks for sufficient resource, this function
// requires NUMANodeList with properly populated NUMANode, NUMAID should be in range 0-63
func resMatchNUMANodes(numaNodes NUMANodeList, resources v1.ResourceList, qos v1.PodQOSClass, nodeInfo *framework.NodeInfo) bool {
bitmask := bm.NewEmptyBitMask()
// set all bits, each bit is a NUMA node, if resources couldn't be aligned
// on the NUMA node, bit should be unset
bitmask.Fill()
zeroQuantity := resource.MustParse("0")
for resource, quantity := range resources {
// for each requested resource, calculate which NUMA slots are good fits, and then AND with the aggregated bitmask, IOW unset appropriate bit if we can't align resources, or set it
// obvious, bits which are not in the NUMA id's range would be unset
resourceBitmask := bm.NewEmptyBitMask()
for _, numaNode := range numaNodes {
numaQuantity, ok := numaNode.Resources[resource]
// if the requested resource can't be found on the NUMA node, we still need to check
// if the resource can be found at the node itself, because there are resources which are not NUMA aligned
// or not supported by the topology exporter - if resource was not found at both checks - skip (don't set it as available NUMA node).
// if the un-found resource has 0 quantity probably this numa node can be considered.
if !ok && !resourceFoundOnNode(resource, quantity, nodeInfo) && quantity.Cmp(zeroQuantity) != 0 {
continue
}
// Check for the following:
// 1. set numa node as possible node if resource is memory or Hugepages
// 2. set numa node as possible node if resource is cpu and it's not guaranteed QoS, since cpu will flow
// 3. set numa node as possible node if zero quantity for non existing resource was requested
// 4. otherwise check amount of resources
if resource == v1.ResourceMemory ||
strings.HasPrefix(string(resource), v1.ResourceHugePagesPrefix) ||
resource == v1.ResourceCPU && qos != v1.PodQOSGuaranteed ||
quantity.Cmp(zeroQuantity) == 0 ||
numaQuantity.Cmp(quantity) >= 0 {
// possible to align resources on NUMA node
resourceBitmask.Add(numaNode.NUMAID)
}
}
bitmask.And(resourceBitmask)
if bitmask.IsEmpty() {
return true
}
}
return bitmask.IsEmpty()
}
func singleNUMAPodLevelHandler(pod *v1.Pod, zones topologyv1alpha1.ZoneList, nodeInfo *framework.NodeInfo) *framework.Status {
klog.V(5).InfoS("Pod Level Resource handler")
resources := make(v1.ResourceList)
// We count here in the way TopologyManager is doing it, IOW we put InitContainers
// and normal containers in the one scope
for _, container := range append(pod.Spec.InitContainers, pod.Spec.Containers...) {
for resource, quantity := range container.Resources.Requests {
if q, ok := resources[resource]; ok {
quantity.Add(q)
}
resources[resource] = quantity
}
}
if resMatchNUMANodes(createNUMANodeList(zones), resources, v1qos.GetPodQOS(pod), nodeInfo) {
// definitely we can't align container, so we can't align a pod
return framework.NewStatus(framework.Unschedulable, fmt.Sprintf("cannot align pod: %s", pod.Name))
}
return nil
}
// Filter Now only single-numa-node supported
func (tm *TopologyMatch) Filter(ctx context.Context, cycleState *framework.CycleState, pod *v1.Pod, nodeInfo *framework.NodeInfo) *framework.Status {
if nodeInfo.Node() == nil {
return framework.NewStatus(framework.Error, "node not found")
}
if v1qos.GetPodQOS(pod) == v1.PodQOSBestEffort {
return nil
}
nodeName := nodeInfo.Node().Name
nodeTopology := findNodeTopology(nodeName, &tm.nodeResTopologyPlugin)
if nodeTopology == nil {
return nil
}
klog.V(5).InfoS("Found NodeResourceTopology", klog.KObj(nodeTopology))
for _, policyName := range nodeTopology.TopologyPolicies {
if handler, ok := tm.policyHandlers[topologyv1alpha1.TopologyManagerPolicy(policyName)]; ok {
if status := handler.filter(pod, nodeTopology.Zones, nodeInfo); status != nil {
return status
}
} else {
klog.V(5).Infof("Policy handler not found", "policy", policyName)
}
}
return nil
}
// resourceFoundOnNode checks whether a given resource exist at the node level
// and whether the given quantity is big enough
func resourceFoundOnNode(resName v1.ResourceName, wantQuantity resource.Quantity, nodeInfo *framework.NodeInfo) bool {
resourceList := nodeInfo.Allocatable.ResourceList()
if gotQuantity, ok := resourceList[resName]; ok {
return gotQuantity.Cmp(wantQuantity) >= 0
}
return false
}