Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
onlineCPUSet, err = nodes.GetOnlineCPUsSet(ctx, workerRTNode)
Expect(err).ToNot(HaveOccurred())
cpuID := onlineCPUSet.UnsortedList()[0]
smtLevel = nodes.GetSMTLevel(ctx, cpuID, workerRTNode)
smtLevel, err = nodes.GetSMTLevel(ctx, cpuID, workerRTNode)
Expect(err).ToNot(HaveOccurred(), "Unable to fetch SMT level on node %s, Error: %v", workerRTNode.Name, err)
getter, err = cgroup.BuildGetter(ctx, testclient.DataPlaneClient, testclient.K8sClient)
Expect(err).ToNot(HaveOccurred())
cgroupV2, err = cgroup.IsVersion2(ctx, testclient.DataPlaneClient)
Expand Down Expand Up @@ -251,7 +252,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {

DescribeTable("Verify CPU usage by stress PODs", func(ctx context.Context, guaranteed bool) {
cpuID := onlineCPUSet.UnsortedList()[0]
smtLevel := nodes.GetSMTLevel(ctx, cpuID, workerRTNode)
smtLevel, err := nodes.GetSMTLevel(ctx, cpuID, workerRTNode)
Expect(err).ToNot(HaveOccurred(), "Unable to fetch SMT level on node %s, Error: %v", workerRTNode.Name, err)
if smtLevel < 2 {
Skip(fmt.Sprintf("designated worker node %q has SMT level %d - minimum required 2", workerRTNode.Name, smtLevel))
}
Expand Down Expand Up @@ -281,7 +283,6 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
}

By(fmt.Sprintf("create a %s QoS stress pod requesting %d cpus", expectedQos, cpuRequest))
var err error
err = testclient.DataPlaneClient.Create(ctx, testpod)
Expect(err).ToNot(HaveOccurred())

Expand Down Expand Up @@ -414,7 +415,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
}

cpuID := onlineCPUSet.UnsortedList()[0]
smtLevel = nodes.GetSMTLevel(context.TODO(), cpuID, workerRTNode)
smtLevel, err = nodes.GetSMTLevel(context.TODO(), cpuID, workerRTNode)
Expect(err).ToNot(HaveOccurred(), "Unable to fetch SMT level on node %s, Error: %v", workerRTNode.Name, err)
})

AfterEach(func() {
Expand Down Expand Up @@ -584,7 +586,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
// also covers Hyper-thread aware sheduling [test_id:46545] Odd number of isolated CPU threads
// any random existing cpu is fine
cpuID := onlineCPUSet.UnsortedList()[0]
smtLevel := nodes.GetSMTLevel(context.TODO(), cpuID, workerRTNode)
smtLevel, err := nodes.GetSMTLevel(context.TODO(), cpuID, workerRTNode)
Expect(err).ToNot(HaveOccurred(), "Unable to fetch SMT level on node %s, Error: %v", workerRTNode.Name, err)
if smtLevel < 2 {
Skip(fmt.Sprintf("designated worker node %q has SMT level %d - minimum required 2", workerRTNode.Name, smtLevel))
}
Expand All @@ -593,7 +596,7 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
testpod = promotePodToGuaranteed(getStressPod(workerRTNode.Name, cpuCount))
testpod.Namespace = testutils.NamespaceTesting

err := testclient.DataPlaneClient.Create(context.TODO(), testpod)
err = testclient.DataPlaneClient.Create(context.TODO(), testpod)
Expect(err).ToNot(HaveOccurred())

currentPod, err := pods.WaitForPredicate(context.TODO(), client.ObjectKeyFromObject(testpod), 10*time.Minute, func(pod *corev1.Pod) (bool, error) {
Expand Down Expand Up @@ -643,7 +646,8 @@ var _ = Describe("[rfe_id:27363][performance] CPU Management", Ordered, func() {
// Check for SMT enabled
// any random existing cpu is fine
cpuID := onlineCPUSet.UnsortedList()[0]
smtLevel := nodes.GetSMTLevel(ctx, cpuID, workerRTNode)
smtLevel, err := nodes.GetSMTLevel(ctx, cpuID, workerRTNode)
Expect(err).ToNot(HaveOccurred(), "Unable to fetch SMT level on node %s, Error: %v", workerRTNode.Name, err)
hasWP := checkForWorkloadPartitioning(ctx)

// Following checks are required to map test_id scenario correctly to the type of node under test
Expand Down
12 changes: 8 additions & 4 deletions test/e2e/performanceprofile/functests/utils/nodes/nodes.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,16 +262,20 @@ func GetOnlineCPUsSet(ctx context.Context, node *corev1.Node) (cpuset.CPUSet, er

// GetSMTLevel returns the SMT level on the node using the given cpuID as target
// Use a random cpuID from the return value of GetOnlineCPUsSet if not sure
func GetSMTLevel(ctx context.Context, cpuID int, node *corev1.Node) int {
func GetSMTLevel(ctx context.Context, cpuID int, node *corev1.Node) (int, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GinkgoHelper() in the first line will solve exactly that without having to change the function API
https://onsi.github.io/ginkgo/#mental-model-how-ginkgo-handles-failure

Copy link
Contributor

@ffromani ffromani Sep 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Tal-or is very correct. In addition, that would allow us to remove the fragile WithOffset expectations (mostly my fault here).

That said, changing the function signature and return an explicit error LGTM and I slightly prefer that approach for library code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Tal-or If i understand correctly, if we used GinkgoHelper(), it would remove the panic and point the failure to Test itself, which originally is not the motive of the API change.

Considering a library function, it should work when called outside a test spec block, where it would panic. Please let me know if I’ve misunderstood anything here.

Copy link
Contributor

@Tal-or Tal-or Sep 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is considered to be panic? a go assertion? because it's expected to abort a test with an assertion in Ginkgo.
All GinkgoHelper() is doing is point to the caller of the function (in the stack trace), it's basically equal to call Fail with offset.

Considering a library function, it should work when called outside a test spec block

this function scope is only for the NTO e2e test, it should not be imported into other projects and not even in the none-test code of this specific project, so assertion here is completely fine.

Bottom line, I don't mind whether this function return an errors or asserts, my initial comment was mainly for the sake of discussion and broadening the knowledge and expertise as part of the reviewing process.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this function scope is only for the NTO e2e test, it should not be imported into other projects and not even in the none-test code of this specific project, so assertion here is completely fine.

I fully agree with this sentence

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess the remaining open point boils down to: where is this function planned to be used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ffromani Here is a use- case where this function was tend to be used - Link. Ignore the checkHyperthreding() for now as it is just a wrapper over getSMTLevel and would be removed once it itself gets fixed.

cmd := []string{"/bin/sh", "-c", fmt.Sprintf("cat /sys/devices/system/cpu/cpu%d/topology/thread_siblings_list | tr -d \"\n\r\"", cpuID)}
out, err := ExecCommand(ctx, node, cmd)
ExpectWithOffset(1, err).ToNot(HaveOccurred())
if err != nil {
return 0, err
}
threadSiblingsList := testutils.ToString(out)
// how many thread sibling you have = SMT level
// example: 2-way SMT means 2 threads sibling for each thread
cpus, err := cpuset.Parse(strings.TrimSpace(threadSiblingsList))
ExpectWithOffset(1, err).ToNot(HaveOccurred())
return cpus.Size()
if err != nil {
return 0, err
}
return cpus.Size(), nil
}

// GetNumaNodes returns the number of numa nodes and the associated cpus as list on the node
Expand Down