Skip to content

Commit

Permalink
OCPBUGS-4194: rps: use default rps mask kernel API (#650)
Browse files Browse the repository at this point in the history
* rps: machine-config: configure default rps mask

https://lore.kernel.org/netdev/cover.1676635317.git.pabeni@redhat.com/
exposes a new API to configure default rps mask for network devices.
We're adding a MachineConfig file to configure the desired rps mask

We are configuring the rps mask directly from sysctl and not
from TuneD because at the boot process
sysctl spins up earlier than the veth devices.

Signed-off-by: Talor Itzhak <titzhak@redhat.com>

* rps: tuned: configure default rps mask

Altough rps configured directly from sysctl,
we're adding it as part of tuned profile as well
to keep the profile up to date with all the system tunings.

esentially we want to have a single source of truth
which is the tuned profile.

Signed-off-by: Talor Itzhak <titzhak@redhat.com>

* rps: cleanup: remove oci hook

This hook point is not in used and should be removed long time ago

Signed-off-by: Talor Itzhak <titzhak@redhat.com>

* e2e: rps: enable and reformat tests

Enable the tests again and look for the default rps value
instead of the systemd unit.

In addition verify the rps mask not being applied on
physical devices.

Signed-off-by: Talor Itzhak <titzhak@redhat.com>

---------

Signed-off-by: Talor Itzhak <titzhak@redhat.com>
  • Loading branch information
Tal-or committed Jun 12, 2023
1 parent 262022d commit ef723ff
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 104 deletions.
@@ -1,5 +1,5 @@
# We should apply the rule on the virtual interfaces of the host, because of the RPS mask that would be consulted,
# is the one on the RX side of the veth in the host.
# Apply the RPS mask on the virtual interfaces of the host by default, becasue
# from the container perspective the RPS mask the will be consulted, is the one on the RX side of the veth in the host.
# Consider the following diagram:
# Pod A <veth1 - veth2> host <veth3 - veth4> Pod B
# veth2's RPS affinity is the one determining the CPUs that are handling the packet processing when sending data from Pod A to pod B.
Expand All @@ -8,4 +8,4 @@
# The RPS affinity of the host side should be consulted (because it’s the receiver) and it should be set to cpus not sensitive to preemption (reserved pool).
# 2. Pod A = receiver, host = sender
# In case of no RPS mask on the receiver side, the sender needs to pay the price and do all the processing on its cores.
SUBSYSTEM=="net", ACTION=="add", ENV{ID_BUS}!="pci", TAG+="systemd", ENV{SYSTEMD_WANTS}="update-rps@%k.service"
net.core.rps_default_mask = {{.RPSMask}}
@@ -1 +1 @@
SUBSYSTEM=="net", ACTION=="add", ENV{DEVPATH}!="/devices/virtual/net/veth*", TAG+="systemd", ENV{SYSTEMD_WANTS}="update-rps@%k.service"
SUBSYSTEM=="net", ACTION=="add", ENV{DEVPATH}!="/devices/virtual/net/*", TAG+="systemd", ENV{SYSTEMD_WANTS}="update-rps@%k.service"
5 changes: 5 additions & 0 deletions assets/performanceprofile/tuned/openshift-node-performance
Expand Up @@ -109,6 +109,11 @@ vm.dirty_background_ratio=3
#> latency-performance
vm.swappiness=10

# also configured via a sysctl.d file
# placed here for better reflection of the change
#> rps configuration
net.core.rps_default_mask=${not_isolated_cpumask}


[selinux]
#> Custom (atomic host)
Expand Down
Expand Up @@ -43,17 +43,17 @@ const (
crioConfd = "/etc/crio/crio.conf.d"
crioRuntimesConfig = "99-runtimes.conf"

// sysctl config
defaultRPSMaskConfig = "99-default-rps-mask.conf"
sysctlConfigDir = "/etc/sysctl.d/"
sysctlTemplateRPSMask = "RPSMask"

// Workload partitioning configs
kubernetesConfDir = "/etc/kubernetes"
crioPartitioningConfig = "99-workload-pinning.conf"
ocpPartitioningConfig = "openshift-workload-pinning"

// OCIHooksConfigDir is the default directory for the OCI hooks
OCIHooksConfigDir = "/etc/containers/oci/hooks.d"
// OCIHooksConfig file contains the low latency hooks configuration
ociTemplateRPSMask = "RPSMask"
udevRulesDir = "/etc/udev/rules.d"
udevRpsRules = "99-netdev-rps.rules"
udevPhysicalRpsRules = "99-netdev-physical-rps.rules"
// scripts
hugepagesAllocation = "hugepages-allocation"
Expand Down Expand Up @@ -187,27 +187,29 @@ func getIgnitionConfig(profile *performancev2.PerformanceProfile, pinningMode *a
if profileutil.IsRpsEnabled(profile) || profile.Spec.WorkloadHints == nil ||
profile.Spec.WorkloadHints.RealTime == nil || *profile.Spec.WorkloadHints.RealTime {

// add rps udev rule
rpsRulesMode := 0644
var rpsRulesContent []byte
if profileutil.IsPhysicalRpsEnabled(profile) {
rpsRulesContent, err = assets.Configs.ReadFile(filepath.Join("configs", udevPhysicalRpsRules))
} else {
rpsRulesContent, err = assets.Configs.ReadFile(filepath.Join("configs", udevRpsRules))
}
// configure default rps mask applied to all network devices
sysctlConfContent, err := renderSysctlConf(profile, filepath.Join("configs", defaultRPSMaskConfig))
if err != nil {
return nil, err
}
rpsRulesDst := filepath.Join(udevRulesDir, udevRpsRules)
addContent(ignitionConfig, rpsRulesContent, rpsRulesDst, &rpsRulesMode)
sysctlConfFileMode := 0644
sysctlConfDst := filepath.Join(sysctlConfigDir, defaultRPSMaskConfig)
addContent(ignitionConfig, sysctlConfContent, sysctlConfDst, &sysctlConfFileMode)

// if RPS disabled for physical devices revert the default RPS mask to 0
if !profileutil.IsPhysicalRpsEnabled(profile) {
// add rps udev rule
rpsRulesMode := 0644
var rpsRulesContent []byte
rpsRulesContent, err = assets.Configs.ReadFile(filepath.Join("configs", udevPhysicalRpsRules))

if profile.Spec.CPU != nil && profile.Spec.CPU.Reserved != nil {
rpsMask, err := components.CPUListToMaskList(string(*profile.Spec.CPU.Reserved))
if err != nil {
return nil, err
}
rpsRulesDst := filepath.Join(udevRulesDir, udevPhysicalRpsRules)
addContent(ignitionConfig, rpsRulesContent, rpsRulesDst, &rpsRulesMode)

rpsService, err := getSystemdContent(getRPSUnitOptions(rpsMask))
rpsService, err := getSystemdContent(getRPSUnitOptions("0"))
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -340,30 +342,6 @@ func getSystemdContent(options []*unit.UnitOption) (string, error) {
return string(outBytes), nil
}

// GetOCIHooksConfigContent reads and returns the content of the OCI hook file
func GetOCIHooksConfigContent(configFile string, profile *performancev2.PerformanceProfile) ([]byte, error) {
ociHookConfigTemplate, err := template.ParseFS(assets.Configs, filepath.Join("configs", configFile))
if err != nil {
return nil, err
}

rpsMask := "0" // RPS disabled
if profile.Spec.CPU != nil && profile.Spec.CPU.Reserved != nil {
rpsMask, err = components.CPUListToMaskList(string(*profile.Spec.CPU.Reserved))
if err != nil {
return nil, err
}
}

outContent := &bytes.Buffer{}
templateArgs := map[string]string{ociTemplateRPSMask: rpsMask}
if err := ociHookConfigTemplate.Execute(outContent, templateArgs); err != nil {
return nil, err
}

return outContent.Bytes(), nil
}

// GetHugepagesSizeKilobytes retruns hugepages size in kilobytes
func GetHugepagesSizeKilobytes(hugepagesSize performancev2.HugePageSize) (string, error) {
switch hugepagesSize {
Expand Down Expand Up @@ -604,3 +582,30 @@ func BootstrapWorkloadPinningMC(role string, pinningMode *apiconfigv1.CPUPartiti
mc.Spec.Config = runtime.RawExtension{Raw: rawIgnition}
return mc, nil
}

func renderSysctlConf(profile *performancev2.PerformanceProfile, src string) ([]byte, error) {
if profile.Spec.CPU == nil || profile.Spec.CPU.Reserved == nil {
return nil, nil
}

rpsMask, err := components.CPUListToMaskList(string(*profile.Spec.CPU.Reserved))
if err != nil {
return nil, err
}

templateArgs := map[string]string{
sysctlTemplateRPSMask: rpsMask,
}

sysctlConfigTemplate, err := template.ParseFS(assets.Configs, src)
if err != nil {
return nil, err
}

sysctlConfig := &bytes.Buffer{}
if err = sysctlConfigTemplate.Execute(sysctlConfig, templateArgs); err != nil {
return nil, err
}

return sysctlConfig.Bytes(), nil
}
74 changes: 42 additions & 32 deletions test/e2e/performanceprofile/functests/1_performance/performance.go
Expand Up @@ -295,7 +295,7 @@ var _ = Describe("[rfe_id:27368][performance]", Ordered, func() {
Skip("Test Skipped due nil Reserved cpus")
}
})
It("[test_id: 59572] Check RPS Mask is applied to atleast one single rx queue on all veth interface", func() {
It("[test_id: 59572] Check RPS Mask is applied to at least one single rx queue on all veth interface", func() {
if profile.Spec.WorkloadHints != nil && profile.Spec.WorkloadHints.RealTime != nil &&
!*profile.Spec.WorkloadHints.RealTime && !profileutil.IsRpsEnabled(profile) {
Skip("realTime Workload Hints is not enabled")
Expand All @@ -309,14 +309,15 @@ var _ = Describe("[rfe_id:27368][performance]", Ordered, func() {
allInterfaces, err := nodes.GetNodeInterfaces(node)
Expect(err).ToNot(HaveOccurred())
Expect(allInterfaces).ToNot(BeNil())
// collect all vethinterfaces in a list
// collect all veth interfaces in a list
for _, iface := range allInterfaces {
if iface.Bridge == true && iface.Physical == false {
vethInterfaces = append(vethInterfaces, iface.Name)
}
}
//iterate over all the vethinterface and
//iterate over all the veth interfaces and
//check if at least on single rx-queue has rps mask
klog.Infof("%v", vethInterfaces)
for _, vethinterface := range vethInterfaces {
devicePath := fmt.Sprintf("%s/%s", "/rootfs/sys/devices/virtual/net", vethinterface)
getRPSMaskCmd := []string{"find", devicePath, "-type", "f", "-name", "rps_cpus", "-exec", "cat", "{}", ";"}
Expand All @@ -342,42 +343,51 @@ var _ = Describe("[rfe_id:27368][performance]", Ordered, func() {
Expect(err).ToNot(HaveOccurred())
for _, node := range workerRTNodes {
// Verify the systemd RPS service uses the correct RPS mask
var maskContent string
cmd := []string{"cat", "/rootfs/etc/systemd/system/update-rps@.service"}
unitFileContents, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred())
for _, line := range strings.Split(unitFileContents, "\n") {
if strings.Contains(line, "ExecStart=/usr/local/bin/set-rps-mask.sh") {
maskContent = line
}
cmd := []string{"sysctl", "-n", "net.core.rps_default_mask"}
rpsMaskContent, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred(), "failed to exec command %q on node %q", cmd, node)
rpsMaskContent = strings.TrimSuffix(rpsMaskContent, "\n")
rpsCPUs, err := components.CPUMaskToCPUSet(rpsMaskContent)
Expect(err).ToNot(HaveOccurred(), "failed to parse RPS mask %q", rpsMaskContent)
Expect(rpsCPUs.Equals(expectedRPSCPUs)).To(BeTrue(), "the default rps mask is different from the reserved CPUs; have %q want %q", rpsCPUs.String(), expectedRPSCPUs.String())

By("verify RPS mask on virtual network devices")
cmd = []string{
"find", "/rootfs/sys/devices/virtual/net",
"-path", "/rootfs/sys/devices/virtual/net/lo",
"-prune", "-o",
"-type", "f",
"-name", "rps_cpus",
"-exec", "cat", "{}", ";",
}
rpsMaskContent := strings.TrimSuffix(maskContent, "\r")
Expect(len(strings.Split(rpsMaskContent, " "))).To(Equal(3), "systemd unit file rps mask value is empty")
serviceRPSCPUs := strings.Split(rpsMaskContent, " ")[2]

rpsCPUs, err := components.CPUMaskToCPUSet(serviceRPSCPUs)
Expect(err).ToNot(HaveOccurred())
Expect(rpsCPUs).To(Equal(expectedRPSCPUs), "the service rps mask is different from the reserved CPUs")

// Verify all host network devices have the correct RPS mask
if profileutil.IsRpsEnabled(profile) {
cmd = []string{"find", "/rootfs/sys/devices", "-type", "f", "-name", "rps_cpus", "-exec", "cat", "{}", ";"}
} else {
cmd = []string{"find", "/rootfs/sys/devices/virtual", "-type", "f", "-name", "rps_cpus", "-exec", "cat", "{}", ";"}
devsRPS, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred(), "failed to exec command %q on node %q", cmd, node.Name)
for _, devRPS := range strings.Split(devsRPS, "\n") {
rpsCPUs, err = components.CPUMaskToCPUSet(devRPS)
Expect(err).ToNot(HaveOccurred())
Expect(rpsCPUs.Equals(expectedRPSCPUs)).To(BeTrue(),
"a host device rps mask is different from the reserved CPUs; have %q want %q", rpsCPUs.String(), expectedRPSCPUs.String())
}

devsRPS, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred())
By("verify RPS mask on physical network devices")
if !profileutil.IsPhysicalRpsEnabled(profile) {
// empty cpuset
expectedRPSCPUs = cpuset.NewCPUSet([]int{}...)
}
cmd = []string{
"find", "/rootfs/sys/devices",
"-regex", "/rootfs/sys/devices/pci.*",
"-type", "f",
"-name", "rps_cpus",
"-exec", "cat", "{}", ";",
}
devsRPS, err = nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred(), "failed to exec command %q on node %q", cmd, node.Name)

for _, devRPS := range strings.Split(devsRPS, "\n") {
rpsCPUs, err = components.CPUMaskToCPUSet(devRPS)
Expect(err).ToNot(HaveOccurred())
if rpsCPUs.String() != string(*profile.Spec.CPU.Reserved) {
testlog.Info("Applying RPS Mask can be skipped due to race conditions")
testlog.Info("This is a known issue, Refer OCPBUGS-4194")
}
//Once the OCPBUGS-4194 is fixed Remove the If condition and uncomment the below assertion
//Expect(rpsCPUs).To(Equal(expectedRPSCPUs), "a host device rps mask is different from the reserved CPUs")
Expect(rpsCPUs.Equals(expectedRPSCPUs)).To(BeTrue(), "a host device rps mask is different from the reserved CPUs; have %q want %q", rpsCPUs.String(), expectedRPSCPUs.String())
}
}
})
Expand Down
Expand Up @@ -1926,34 +1926,29 @@ var _ = Describe("[rfe_id:28761][performance] Updating parameters in performance
for _, node := range workerRTNodes {
// Verify the systemd RPS service uses the correct RPS mask
var maskContent string
cmd := []string{"cat", "/rootfs/etc/systemd/system/update-rps@.service"}
unitFileContents, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred())
for _, line := range strings.Split(unitFileContents, "\n") {
if strings.Contains(line, "ExecStart=/usr/local/bin/set-rps-mask.sh") {
maskContent = line
}
}
rpsMaskContent := strings.TrimSuffix(maskContent, "\r")
Expect(len(strings.Split(rpsMaskContent, " "))).To(Equal(3), "systemd unit file doesn't have proper rpsmask")
serviceRPSCPUs := strings.Split(rpsMaskContent, " ")[2]
rpsCPUs, err := components.CPUMaskToCPUSet(serviceRPSCPUs)
Expect(err).ToNot(HaveOccurred())
Expect(rpsCPUs).To(Equal(expectedRPSCPUs), "the service rps mask is different from the reserved CPUs")
cmd := []string{"sysctl", "-n", "net.core.rps_default_mask"}
maskContent, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred(), "failed to exec command %q on node %q", cmd, node)
rpsMaskContent := strings.Trim(maskContent, "\n")
rpsCPUs, err := components.CPUMaskToCPUSet(rpsMaskContent)
Expect(err).ToNot(HaveOccurred(), "failed to parse RPS mask %q", rpsMaskContent)
Expect(rpsCPUs.Equals(expectedRPSCPUs)).To(BeTrue(), "the default rps mask is different from the reserved CPUs")

// Verify all host network devices have the correct RPS mask
cmd = []string{"find", "/rootfs/sys/devices/virtual", "-type", "f", "-name", "rps_cpus", "-exec", "cat", "{}", ";"}
cmd = []string{
"find", "/rootfs/sys/devices/virtual/net",
"-path", "/rootfs/devices/virtual/net/lo",
"-prune", "-o",
"-type", "f",
"-name", "rps_cpus",
"-exec", "cat", "{}", ";",
}
devsRPS, err := nodes.ExecCommandOnNode(cmd, &node)
Expect(err).ToNot(HaveOccurred())
Expect(err).ToNot(HaveOccurred(), "failed to exec command %q on node %q", cmd, node.Name)
for _, devRPS := range strings.Split(devsRPS, "\n") {
rpsCPUs, err = components.CPUMaskToCPUSet(devRPS)
Expect(err).ToNot(HaveOccurred())
if rpsCPUs.String() != string(*profile.Spec.CPU.Reserved) {
testlog.Info("Applying RPS Mask can be skipped due to race conditions")
testlog.Info("This is a known issue, Refer OCPBUGS-4194")
}
//Once the OCPBUGS-4194 is fixed Remove the If condition and uncomment the below assertion
//Expect(rpsCPUs).To(Equal(expectedRPSCPUs), "a host device rps mask is different from the reserved CPUs")
Expect(rpsCPUs.Equals(expectedRPSCPUs)).To(BeTrue(), "a host device rps mask is different from the reserved CPUs")
}
}
})
Expand Down

0 comments on commit ef723ff

Please sign in to comment.