Skip to content

Commit

Permalink
nrt: overreserve: detect and honor pfp compute method from NRT
Browse files Browse the repository at this point in the history
When computing the expected node pod fingerprint (PFP), we
now look for the attribute describing the computation method
provided by the topology-updater agent. If not provided, we
fall back to the previous backward-compatible method, which
is consider all pods.

If the method is supplied, the scheduler plugin now computes
the PFP in the same way advertised as the agent.

Signed-off-by: Francesco Romani <fromani@redhat.com>
  • Loading branch information
ffromani committed May 23, 2023
1 parent 23d663c commit 2a01ad5
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 22 deletions.
6 changes: 3 additions & 3 deletions pkg/noderesourcetopology/cache/overreserve.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,15 +220,15 @@ func (ov *OverReserve) Resync() {
continue
}

pfpExpected := podFingerprintForNodeTopology(nrtCandidate)
pfpExpected, onlyExclRes := podFingerprintForNodeTopology(nrtCandidate)
if pfpExpected == "" {
klog.V(3).InfoS("nrtcache: missing NodeTopology podset fingerprint data", "logID", logID, "node", nodeName)
continue
}

klog.V(6).InfoS("nrtcache: trying to resync NodeTopology", "logID", logID, "node", nodeName, "fingerprint", pfpExpected)
klog.V(6).InfoS("nrtcache: trying to resync NodeTopology", "logID", logID, "node", nodeName, "fingerprint", pfpExpected, "onlyExclusiveResources", onlyExclRes)

err = checkPodFingerprintForNode(logID, objs, nodeName, pfpExpected)
err = checkPodFingerprintForNode(logID, objs, nodeName, pfpExpected, onlyExclRes)
if errors.Is(err, podfingerprint.ErrSignatureMismatch) {
// can happen, not critical
klog.V(5).InfoS("nrtcache: NodeTopology podset fingerprint mismatch", "logID", logID, "node", nodeName)
Expand Down
28 changes: 18 additions & 10 deletions pkg/noderesourcetopology/cache/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
topologyv1alpha2 "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2"
topologyv1alpha2attr "github.com/k8stopologyawareschedwg/noderesourcetopology-api/pkg/apis/topology/v1alpha2/helper/attribute"

"sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/resourcerequests"
"sigs.k8s.io/scheduler-plugins/pkg/noderesourcetopology/stringify"
"sigs.k8s.io/scheduler-plugins/pkg/util"

Expand Down Expand Up @@ -197,33 +198,39 @@ func (cnt counter) Len() int {

// podFingerprintForNodeTopology extracts without recomputing the pods fingerprint from
// the provided Node Resource Topology object.
func podFingerprintForNodeTopology(nrt *topologyv1alpha2.NodeResourceTopology) string {
func podFingerprintForNodeTopology(nrt *topologyv1alpha2.NodeResourceTopology) (string, bool) {
if attr, ok := topologyv1alpha2attr.Get(nrt.Attributes, podfingerprint.Attribute); ok {
return attr.Value
attrMethod, ok := topologyv1alpha2attr.Get(nrt.Attributes, podfingerprint.AttributeMethod)
onlyExclusiveResources := ok && (attrMethod.Value == podfingerprint.MethodWithExclusiveResources)
return attr.Value, onlyExclusiveResources
}
if nrt.Annotations != nil {
return nrt.Annotations[podfingerprint.Annotation]
return nrt.Annotations[podfingerprint.Annotation], false
}
return ""
return "", false
}

type podInfo struct {
Namespace string
Name string
Namespace string
Name string
HasExclusiveResources bool
}

// checkPodFingerprintForNode verifies if the given pods fingeprint (usually from NRT update) matches the
// computed one using the stored data about pods running on nodes. Returns nil on success, or an error
// describing the failure
func checkPodFingerprintForNode(logID string, objs []podInfo, nodeName, pfpExpected string) error {
func checkPodFingerprintForNode(logID string, objs []podInfo, nodeName, pfpExpected string, onlyExclusiveResources bool) error {
st := podfingerprint.MakeStatus(nodeName)
pfp := podfingerprint.NewTracingFingerprint(len(objs), &st)
for _, obj := range objs {
if onlyExclusiveResources && !obj.HasExclusiveResources {
continue
}
pfp.Add(obj.Namespace, obj.Name)
}
pfpComputed := pfp.Sign()

klog.V(5).InfoS("nrtcache: podset fingerprint check", "logID", logID, "node", nodeName, "expected", pfpExpected, "computed", pfpComputed)
klog.V(5).InfoS("nrtcache: podset fingerprint check", "logID", logID, "node", nodeName, "expected", pfpExpected, "computed", pfpComputed, "onlyExclusiveResources", onlyExclusiveResources)
klog.V(6).InfoS("nrtcache: podset fingerprint debug", "logID", logID, "node", nodeName, "status", st.Repr())

err := pfp.Check(pfpExpected)
Expand All @@ -244,8 +251,9 @@ func makeNodeToPodInfosMap(podLister podlisterv1.PodLister, logID string) (map[s
}
nodeObjs := nodeToObjsMap[pod.Spec.NodeName]
nodeObjs = append(nodeObjs, podInfo{
Namespace: pod.Namespace,
Name: pod.Name,
Namespace: pod.Namespace,
Name: pod.Name,
HasExclusiveResources: resourcerequests.AreExclusiveForPod(pod),
})
nodeToObjsMap[pod.Spec.NodeName] = nodeObjs
}
Expand Down
94 changes: 85 additions & 9 deletions pkg/noderesourcetopology/cache/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,34 +45,109 @@ func TestFingerprintFromNRT(t *testing.T) {
}

var pfp string
pfp = podFingerprintForNodeTopology(nrt)
var exOnly bool
pfp, exOnly = podFingerprintForNodeTopology(nrt)
if pfp != "" {
t.Errorf("misdetected fingerprint from missing annotations")
}
if exOnly {
t.Errorf("misdetected fingerprint compute method from missing annotations")
}

nrt.Annotations = map[string]string{}
pfp = podFingerprintForNodeTopology(nrt)
pfp, exOnly = podFingerprintForNodeTopology(nrt)
if pfp != "" {
t.Errorf("misdetected fingerprint from empty annotations")
}
if exOnly {
t.Errorf("misdetected fingerprint compute method from empty annotations")
}

pfpTestAnn := "test-ann"
nrt.Annotations[podfingerprint.Annotation] = pfpTestAnn
pfp = podFingerprintForNodeTopology(nrt)
pfp, exOnly = podFingerprintForNodeTopology(nrt)
if pfp != pfpTestAnn {
t.Errorf("misdetected fingerprint as %q expected %q", pfp, pfpTestAnn)
}
if exOnly {
t.Errorf("misdetected fingerprint compute method from empty method annotation")
}

// test attribute overrides annotation
pfpTestAttr := "test-attr"
nrt.Attributes = append(nrt.Attributes, topologyv1alpha2.AttributeInfo{
Name: podfingerprint.Attribute,
Value: pfpTestAttr,
})
pfp = podFingerprintForNodeTopology(nrt)
pfp, exOnly = podFingerprintForNodeTopology(nrt)
if pfp != pfpTestAttr {
t.Errorf("misdetected fingerprint as %q expected %q", pfp, pfpTestAttr)
}
if exOnly {
t.Errorf("misdetected fingerprint compute method from empty method attribute")
}
}

func TestFingerprintMethodFromNRT(t *testing.T) {
nrt := &topologyv1alpha2.NodeResourceTopology{
ObjectMeta: metav1.ObjectMeta{
Name: "node-0",
},
TopologyPolicies: []string{
"best-effort",
},
}

// test attribute overrides annotation
pfpTestAttr := "test-attr"
nrt.Attributes = append(nrt.Attributes, topologyv1alpha2.AttributeInfo{
Name: podfingerprint.Attribute,
Value: pfpTestAttr,
})

tcases := []struct {
description string
methodValue string
expected bool
}{
{
description: "no attr",
methodValue: "",
expected: false,
},
{
description: "unrecognized attr",
methodValue: "foobar",
expected: false,
},
{
description: "all (old default)",
methodValue: podfingerprint.MethodAll,
expected: false,
},
{
description: "exclusive only",
methodValue: podfingerprint.MethodWithExclusiveResources,
expected: true,
},
}

for _, tcase := range tcases {
t.Run(tcase.description, func(t *testing.T) {
nrtObj := nrt.DeepCopy()
if tcase.methodValue != "" {
}
nrtObj.Attributes = append(nrt.Attributes, topologyv1alpha2.AttributeInfo{
Name: podfingerprint.AttributeMethod,
Value: tcase.methodValue,
})

_, exOnly := podFingerprintForNodeTopology(nrtObj)
if exOnly != tcase.expected {
t.Errorf("misdetected method: expected %v (from %q) got %v", tcase.expected, tcase.methodValue, exOnly)
}
})
}
}

func TestNRTStoreGet(t *testing.T) {
Expand Down Expand Up @@ -586,10 +661,11 @@ func TestMakeNodeToPodInfosMap(t *testing.T) {

func TestCheckPodFingerprintForNode(t *testing.T) {
tcases := []struct {
description string
objs []podInfo
pfp string
expectedErr error
description string
objs []podInfo
exclusiveOnly bool
pfp string
expectedErr error
}{
{
description: "nil objs",
Expand All @@ -599,7 +675,7 @@ func TestCheckPodFingerprintForNode(t *testing.T) {

for _, tcase := range tcases {
t.Run(tcase.description, func(t *testing.T) {
gotErr := checkPodFingerprintForNode("testing", tcase.objs, "test-node", tcase.pfp)
gotErr := checkPodFingerprintForNode("testing", tcase.objs, "test-node", tcase.pfp, tcase.exclusiveOnly)
if !errors.Is(gotErr, tcase.expectedErr) {
t.Errorf("got error %v expected %v", gotErr, tcase.expectedErr)
}
Expand Down

0 comments on commit 2a01ad5

Please sign in to comment.