diff --git a/assets/performanceprofile/tuned/openshift-node-performance b/assets/performanceprofile/tuned/openshift-node-performance index 586914399..4f076188a 100644 --- a/assets/performanceprofile/tuned/openshift-node-performance +++ b/assets/performanceprofile/tuned/openshift-node-performance @@ -38,8 +38,10 @@ transparent_hugepages=never {{if not .GloballyDisableIrqLoadBalancing}} [irqbalance] -#> Override the value set by cpu-partitioning with an empty one -banned_cpus="" +# Disable the plugin entirely, which was enabled by the parent profile `cpu-partitioning`. +# It can be racy if TuneD restarts for whatever reason. +#> cpu-partitioning +enabled=false {{end}} [scheduler] diff --git a/pkg/performanceprofile/controller/performanceprofile/components/tuned/tuned.go b/pkg/performanceprofile/controller/performanceprofile/components/tuned/tuned.go index a18ace3b7..e0a839779 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/tuned/tuned.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/tuned/tuned.go @@ -109,8 +109,7 @@ func NewNodePerformance(profile *performancev2.PerformanceProfile) (*tunedv1.Tun templateArgs[templateAdditionalArgs] = strings.Join(profile.Spec.AdditionalKernelArgs, cmdlineDelimiter) } - if profile.Spec.GloballyDisableIrqLoadBalancing != nil && - *profile.Spec.GloballyDisableIrqLoadBalancing == true { + if IsIRQBalancingGloballyDisabled(profile) { templateArgs[templateGloballyDisableIrqLoadBalancing] = strconv.FormatBool(true) } @@ -218,3 +217,7 @@ func getProfileData(tunedTemplate string, data interface{}) (string, error) { } return profile.String(), nil } + +func IsIRQBalancingGloballyDisabled(profile *performancev2.PerformanceProfile) bool { + return profile.Spec.GloballyDisableIrqLoadBalancing != nil && *profile.Spec.GloballyDisableIrqLoadBalancing +} diff --git a/pkg/performanceprofile/controller/performanceprofile/components/utils.go b/pkg/performanceprofile/controller/performanceprofile/components/utils.go index fc8efd1d9..22d76e4b7 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/utils.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/utils.go @@ -140,7 +140,7 @@ func CPUMaskToCPUSet(cpuMask string) (cpuset.CPUSet, error) { } mask, err := strconv.ParseUint(chunk, 16, bitsInWord) if err != nil { - return cpuset.NewCPUSet(), fmt.Errorf("failed to parse the CPU mask %q: %v", cpuMask, err) + return cpuset.NewCPUSet(), fmt.Errorf("failed to parse the CPU mask %q (chunk %q): %v", cpuMask, chunk, err) } for j := 0; j < bitsInWord; j++ { if mask&1 == 1 { diff --git a/pkg/performanceprofile/controller/performanceprofile/components/utils_test.go b/pkg/performanceprofile/controller/performanceprofile/components/utils_test.go index f96d49e2f..92b96425e 100644 --- a/pkg/performanceprofile/controller/performanceprofile/components/utils_test.go +++ b/pkg/performanceprofile/controller/performanceprofile/components/utils_test.go @@ -16,6 +16,7 @@ var cpuListToMask = []listToMask{ {"0", "00000001"}, {"2-3", "0000000c"}, {"3,4,53-55,61-63", "e0e00000,00000018"}, + {"1,3-7", "000000fa"}, {"0-127", "ffffffff,ffffffff,ffffffff,ffffffff"}, {"0-255", "ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff,ffffffff"}, } diff --git a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go index 695dce4e9..b6fd890db 100644 --- a/test/e2e/performanceprofile/functests/1_performance/cpu_management.go +++ b/test/e2e/performanceprofile/functests/1_performance/cpu_management.go @@ -781,7 +781,7 @@ func promotePodToGuaranteed(pod *corev1.Pod) *corev1.Pod { return pod } -func getTestPodWithAnnotations(annotations map[string]string, cpus int) *corev1.Pod { +func getTestPodWithProfileAndAnnotations(perfProf *performancev2.PerformanceProfile, annotations map[string]string, cpus int) *corev1.Pod { testpod := pods.GetTestPod() if len(annotations) > 0 { testpod.Annotations = annotations @@ -801,8 +801,17 @@ func getTestPodWithAnnotations(annotations map[string]string, cpus int) *corev1. }, } - runtimeClassName := components.GetComponentName(profile.Name, components.ComponentNamePrefix) - testpod.Spec.RuntimeClassName = &runtimeClassName + if perfProf != nil { + runtimeClassName := components.GetComponentName(perfProf.Name, components.ComponentNamePrefix) + testpod.Spec.RuntimeClassName = &runtimeClassName + } + + return testpod +} + +func getTestPodWithAnnotations(annotations map[string]string, cpus int) *corev1.Pod { + testpod := getTestPodWithProfileAndAnnotations(profile, annotations, cpus) + testpod.Spec.NodeSelector = map[string]string{testutils.LabelHostname: workerRTNode.Name} return testpod diff --git a/test/e2e/performanceprofile/functests/1_performance/irqbalance.go b/test/e2e/performanceprofile/functests/1_performance/irqbalance.go new file mode 100644 index 000000000..e54aa87cf --- /dev/null +++ b/test/e2e/performanceprofile/functests/1_performance/irqbalance.go @@ -0,0 +1,247 @@ +package __performance + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "os" + "strings" + "time" + + corev1 "k8s.io/api/core/v1" + "k8s.io/kubernetes/pkg/kubelet/cm/cpuset" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + + machineconfigv1 "github.com/openshift/machine-config-operator/pkg/apis/machineconfiguration.openshift.io/v1" + + performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2" + "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components" + "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/tuned" + testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils" + testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/discovery" + testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles" + "github.com/openshift/cluster-node-tuning-operator/test/e2e/util" + "github.com/openshift/cluster-node-tuning-operator/test/framework" +) + +var ( + cs = framework.NewClientSet() +) + +var _ = Describe("[performance] Checking IRQBalance settings", func() { + var workerRTNodes []corev1.Node + var targetNode *corev1.Node + var profile *performancev2.PerformanceProfile + var performanceMCP string + var err error + + BeforeEach(func() { + if discovery.Enabled() && testutils.ProfileNotFound { + Skip("Discovery mode enabled, performance profile not found") + } + + workerRTNodes, err = nodes.GetByLabels(testutils.NodeSelectorLabels) + Expect(err).ToNot(HaveOccurred()) + profile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels) + Expect(err).ToNot(HaveOccurred()) + performanceMCP, err = mcps.GetByProfile(profile) + Expect(err).ToNot(HaveOccurred()) + + // Verify that worker and performance MCP have updated state equals to true + for _, mcpName := range []string{testutils.RoleWorker, performanceMCP} { + mcps.WaitForCondition(mcpName, machineconfigv1.MachineConfigPoolUpdated, corev1.ConditionTrue) + } + + nodeIdx := pickNodeIdx(workerRTNodes) + targetNode = &workerRTNodes[nodeIdx] + By(fmt.Sprintf("verifying worker node %q", targetNode.Name)) + }) + + Context("Verify irqbalance configuration handling", func() { + + It("Should not overwrite the banned CPU set on tuned restart", func() { + if profile.Status.RuntimeClass == nil { + Skip("runtime class not generated") + } + + if tuned.IsIRQBalancingGloballyDisabled(profile) { + Skip("this test needs dynamic IRQ balancing") + } + + Expect(targetNode).ToNot(BeNil(), "missing target node") + + irqAffBegin, err := getIrqDefaultSMPAffinity(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to extract the default IRQ affinity from node %q", targetNode.Name) + testlog.Infof("IRQ Default affinity on %q when test begins: {%s}", targetNode.Name, irqAffBegin) + + bannedCPUs, err := getIrqBalanceBannedCPUs(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to extract the banned CPUs from node %q", targetNode.Name) + testlog.Infof("banned CPUs on %q when test begins: {%s}", targetNode.Name, bannedCPUs.String()) + + smpAffinitySet, err := nodes.GetDefaultSmpAffinitySet(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to get default smp affinity") + + onlineCPUsSet, err := nodes.GetOnlineCPUsSet(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to get Online CPUs list") + + // expect no irqbalance run in the system already, AKA start from pristine conditions. + // This is not an hard requirement, just the easier state to manage and check + Expect(smpAffinitySet.Equals(onlineCPUsSet)).To(BeTrue(), "found default_smp_affinity %v, expected %v - IRQBalance already run?", smpAffinitySet, onlineCPUsSet) + + cpuRequest := 2 // minimum amount to be reasonably sure we're SMT-aligned + annotations := map[string]string{ + "irq-load-balancing.crio.io": "disable", + "cpu-quota.crio.io": "disable", + } + testpod := getTestPodWithProfileAndAnnotations(profile, annotations, cpuRequest) + testpod.Spec.NodeName = targetNode.Name + + data, _ := json.Marshal(testpod) + testlog.Infof("using testpod:\n%s", string(data)) + + err = testclient.Client.Create(context.TODO(), testpod) + Expect(err).ToNot(HaveOccurred()) + defer func() { + if testpod != nil { + testlog.Infof("deleting pod %q", testpod.Name) + deleteTestPod(testpod) + } + bannedCPUs, err := getIrqBalanceBannedCPUs(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to extract the banned CPUs from node %q", targetNode.Name) + + testlog.Infof("banned CPUs on %q when test ends: {%s}", targetNode.Name, bannedCPUs.String()) + + irqAffBegin, err := getIrqDefaultSMPAffinity(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to extract the default IRQ affinity from node %q", targetNode.Name) + + testlog.Infof("IRQ Default affinity on %q when test ends: {%s}", targetNode.Name, irqAffBegin) + }() + + err = pods.WaitForCondition(testpod, corev1.PodReady, corev1.ConditionTrue, 10*time.Minute) + logEventsForPod(testpod) + Expect(err).ToNot(HaveOccurred()) + + // now we have something in the IRQBalance cpu list. Let's make sure the restart doesn't overwrite this data. + postCreateBannedCPUs, err := getIrqBalanceBannedCPUs(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to extract the banned CPUs from node %q", targetNode.Name) + testlog.Infof("banned CPUs on %q just before the tuned restart: {%s}", targetNode.Name, postCreateBannedCPUs.String()) + + Expect(postCreateBannedCPUs.IsEmpty()).To(BeFalse(), "banned CPUs %v should not be empty on node %q", postCreateBannedCPUs, targetNode.Name) + + By(fmt.Sprintf("getting a TuneD Pod running on node %s", targetNode.Name)) + pod, err := util.GetTunedForNode(cs, targetNode) + Expect(err).NotTo(HaveOccurred()) + + By(fmt.Sprintf("causing a restart of the tuned pod (deleting the pod) on %s", targetNode.Name)) + _, _, err = util.ExecAndLogCommand("oc", "delete", "pod", "--wait=true", "-n", pod.Namespace, pod.Name) + Expect(err).NotTo(HaveOccurred()) + + Eventually(func() error { + By(fmt.Sprintf("getting again a TuneD Pod running on node %s", targetNode.Name)) + pod, err = util.GetTunedForNode(cs, targetNode) + if err != nil { + return err + } + + By(fmt.Sprintf("waiting for the TuneD daemon running on node %s", targetNode.Name)) + _, err = util.WaitForCmdInPod(5*time.Second, 5*time.Minute, pod, "test", "-e", "/run/tuned/tuned.pid") + return err + }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).ShouldNot(HaveOccurred()) + + By(fmt.Sprintf("re-verifying worker node %q after TuneD restart", targetNode.Name)) + postRestartBannedCPUs, err := getIrqBalanceBannedCPUs(targetNode) + Expect(err).ToNot(HaveOccurred(), "failed to extract the banned CPUs from node %q", targetNode.Name) + testlog.Infof("banned CPUs on %q after the tuned restart: {%s}", targetNode.Name, postRestartBannedCPUs.String()) + + Expect(postRestartBannedCPUs.ToSlice()).To(Equal(postCreateBannedCPUs.ToSlice()), "banned CPUs changed post tuned restart on node %q", postRestartBannedCPUs.ToSlice(), targetNode.Name) + }) + }) +}) + +// nodes.BannedCPUs fails (!!!) if the current banned list is empty because, deep down, ExecCommandOnNode expects non-empty stdout. +// In turn, we do this to at least have a chance to detect failed commands vs failed to execute commands (we had this issue in +// not-so-distant past, legit command output lost somewhere in the communication). Fixing ExecCommandOnNode isn't trivial and +// require close attention. For the time being we reimplement a form of nodes.BannedCPUs which can handle empty ban list. +func getIrqBalanceBannedCPUs(node *corev1.Node) (cpuset.CPUSet, error) { + cmd := []string{"cat", "/rootfs/etc/sysconfig/irqbalance"} + conf, err := nodes.ExecCommandOnNode(cmd, node) + if err != nil { + return cpuset.NewCPUSet(), err + } + + keyValue := findIrqBalanceBannedCPUsVarFromConf(conf) + if len(keyValue) == 0 { + // can happen: everything commented out (default if no tuning ever) + testlog.Warningf("cannot find the CPU ban list in the configuration (\n%s)\n", conf) + return cpuset.NewCPUSet(), nil + } + + testlog.Infof("banned CPUs setting: %q", keyValue) + + items := strings.FieldsFunc(keyValue, func(c rune) bool { + return c == '=' + }) + if len(items) != 2 { + return cpuset.NewCPUSet(), fmt.Errorf("malformed CPU ban list in the configuration") + } + + bannedCPUs := unquote(strings.TrimSpace(items[1])) + testlog.Infof("banned CPUs: %q", bannedCPUs) + + banned, err := components.CPUMaskToCPUSet(bannedCPUs) + if err != nil { + return cpuset.NewCPUSet(), fmt.Errorf("failed to parse the banned CPUs: %v", err) + } + + return banned, nil +} + +func getIrqDefaultSMPAffinity(node *corev1.Node) (string, error) { + cmd := []string{"cat", "/rootfs/proc/irq/default_smp_affinity"} + return nodes.ExecCommandOnNode(cmd, node) +} + +func findIrqBalanceBannedCPUsVarFromConf(conf string) string { + scanner := bufio.NewScanner(strings.NewReader(conf)) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if strings.HasPrefix(line, "#") { + continue + } + if !strings.HasPrefix(line, "IRQBALANCE_BANNED_CPUS") { + continue + } + return line + } + return "" +} + +func pickNodeIdx(nodes []corev1.Node) int { + name, ok := os.LookupEnv("E2E_PAO_TARGET_NODE") + if !ok { + return 0 // "random" default + } + for idx := range nodes { + if nodes[idx].Name == name { + testlog.Infof("node %q found among candidates, picking", name) + return idx + } + } + testlog.Infof("node %q not found among candidates, fall back to random one", name) + return 0 // "safe" default +} + +func unquote(s string) string { + q := "\"" + s = strings.TrimPrefix(s, q) + s = strings.TrimSuffix(s, q) + return s +} diff --git a/test/e2e/performanceprofile/functests/utils/nodes/nodes.go b/test/e2e/performanceprofile/functests/utils/nodes/nodes.go index c218b18d1..2a9546a63 100644 --- a/test/e2e/performanceprofile/functests/utils/nodes/nodes.go +++ b/test/e2e/performanceprofile/functests/utils/nodes/nodes.go @@ -222,18 +222,31 @@ func HasPreemptRTKernel(node *corev1.Node) error { } func BannedCPUs(node corev1.Node) (banned cpuset.CPUSet, err error) { + irqAff, err := GetDefaultSmpAffinityRaw(&node) + if err != nil { + return cpuset.NewCPUSet(), err + } + testlog.Infof("Default SMP IRQ affinity on node %q is {%s} expected mask length %d", node.Name, irqAff, len(irqAff)) + cmd := []string{"sed", "-n", "s/^IRQBALANCE_BANNED_CPUS=\\(.*\\)/\\1/p", "/rootfs/etc/sysconfig/irqbalance"} bannedCPUs, err := ExecCommandOnNode(cmd, &node) if err != nil { return cpuset.NewCPUSet(), fmt.Errorf("failed to execute %v: %v", cmd, err) } - if bannedCPUs == "" { + testlog.Infof("Banned CPUs on node %q raw value is {%s}", node.Name, bannedCPUs) + + unquotedBannedCPUs := unquote(bannedCPUs) + + if unquotedBannedCPUs == "" { testlog.Infof("Banned CPUs on node %q returned empty set", node.Name) return cpuset.NewCPUSet(), nil // TODO: should this be a error? } - banned, err = components.CPUMaskToCPUSet(bannedCPUs) + fixedBannedCPUs := fixMaskPadding(unquotedBannedCPUs, len(irqAff)) + testlog.Infof("Fixed Banned CPUs on node %q {%s}", node.Name, fixedBannedCPUs) + + banned, err = components.CPUMaskToCPUSet(fixedBannedCPUs) if err != nil { return cpuset.NewCPUSet(), fmt.Errorf("failed to parse the banned CPUs: %v", err) } @@ -241,10 +254,41 @@ func BannedCPUs(node corev1.Node) (banned cpuset.CPUSet, err error) { return banned, nil } +func unquote(s string) string { + q := "\"" + s = strings.TrimPrefix(s, q) + s = strings.TrimSuffix(s, q) + return s +} + +func fixMaskPadding(rawMask string, maskLen int) string { + maskString := strings.ReplaceAll(rawMask, ",", "") + + fixedMask := fixMask(maskString, maskLen) + testlog.Infof("fixed mask (dealing with incorrect crio padding) on node is {%s} len=%d", fixedMask, maskLen) + + retMask := fixedMask[0:8] + for i := 8; i+8 <= len(maskString); i += 8 { + retMask = retMask + "," + maskString[i:i+8] + } + return retMask +} + +func fixMask(maskString string, maskLen int) string { + if maskLen >= len(maskString) { + return maskString + } + return strings.Repeat("0", len(maskString)-maskLen) + maskString[len(maskString)-maskLen:] +} + +func GetDefaultSmpAffinityRaw(node *corev1.Node) (string, error) { + cmd := []string{"cat", "/proc/irq/default_smp_affinity"} + return ExecCommandOnNode(cmd, node) +} + // GetDefaultSmpAffinitySet returns the default smp affinity mask for the node func GetDefaultSmpAffinitySet(node *corev1.Node) (cpuset.CPUSet, error) { - command := []string{"cat", "/proc/irq/default_smp_affinity"} - defaultSmpAffinity, err := ExecCommandOnNode(command, node) + defaultSmpAffinity, err := GetDefaultSmpAffinityRaw(node) if err != nil { return cpuset.NewCPUSet(), err } diff --git a/test/e2e/performanceprofile/testdata/render-expected-output/manual_tuned.yaml b/test/e2e/performanceprofile/testdata/render-expected-output/manual_tuned.yaml index ae97110d8..eb7a4cca9 100644 --- a/test/e2e/performanceprofile/testdata/render-expected-output/manual_tuned.yaml +++ b/test/e2e/performanceprofile/testdata/render-expected-output/manual_tuned.yaml @@ -22,8 +22,9 @@ spec: Different values will override the original values in parent profiles.\n\n[variables]\n#> isolated_cores take a list of ranges; e.g. isolated_cores=2,4-7\n\nisolated_cores=1\n\n\nnot_isolated_cores_expanded=${f:cpulist_invert:${isolated_cores_expanded}}\n\n[cpu]\n#> latency-performance\n#> (override)\nforce_latency=cstate.id:1|3\ngovernor=performance\nenergy_perf_bias=performance\nmin_perf_pct=100\n\n\n[service]\nservice.stalld=start,enable\n\n\n[vm]\n#> - network-latency\ntransparent_hugepages=never\n\n\n[irqbalance]\n#> Override - the value set by cpu-partitioning with an empty one\nbanned_cpus=\"\"\n\n\n[scheduler]\nruntime=0\ngroup.ksoftirqd=0:f:11:*:ksoftirqd.*\ngroup.rcuc=0:f:11:*:rcuc.*\nsched_min_granularity_ns=10000000\nsched_migration_cost_ns=5000000\nnuma_balancing=0\n\nsched_rt_runtime_us=-1\n\n\ndefault_irq_smp_affinity + network-latency\ntransparent_hugepages=never\n\n\n[irqbalance]\n# Disable the + plugin entirely, which was enabled by the parent profile `cpu-partitioning`.\n# + It can be racy if TuneD restarts for whatever reason.\n#> cpu-partitioning\nenabled=false\n\n\n[scheduler]\nruntime=0\ngroup.ksoftirqd=0:f:11:*:ksoftirqd.*\ngroup.rcuc=0:f:11:*:rcuc.*\nsched_min_granularity_ns=10000000\nsched_migration_cost_ns=5000000\nnuma_balancing=0\n\nsched_rt_runtime_us=-1\n\n\ndefault_irq_smp_affinity = ignore\n\n\n[sysctl]\n\n#> cpu-partitioning #realtime\nkernel.hung_task_timeout_secs=600\n#> cpu-partitioning #realtime\nkernel.nmi_watchdog=0\n#> realtime\nkernel.sched_rt_runtime_us=-1\n#> cpu-partitioning #realtime\nvm.stat_interval=10\n\n# cpu-partitioning and realtime