Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 90 additions & 15 deletions validators/cgroup_validator_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,72 @@ func (c *CgroupsValidator) Name() string {

const (
cgroupsConfigPrefix = "CGROUPS_"
unifiedMountpoint = "/sys/fs/cgroup"
mountsFilePath = "/proc/mounts"
)

// getUnifiedMountpoint checks if the default mount point is available.
// If not, it parses the mounts file to find a valid cgroup mount point.
func getUnifiedMountpoint(path string) (string, error) {
f, err := os.Open(path)
if err != nil {
return "", err
}
defer f.Close()
scanner := bufio.NewScanner(f)
var cgroupV1MountPoint string
for scanner.Scan() {
line := scanner.Text()
if !strings.Contains(line, "cgroup") {
continue
}
// Example fields: `cgroup2 /sys/fs/cgroup cgroup2 rw,seclabel,nosuid,nodev,noexec,relatime 0 0`.
fields := strings.Fields(line)
if len(fields) >= 3 {
switch fields[2] {
case "cgroup2":
// Return the first cgroups v2 mount point directly.
return fields[1], nil
case "cgroup":
// Set the first cgroups v1 mount point only,
// and continue the loop to find if there is a cgroups v2 mount point.
if len(cgroupV1MountPoint) == 0 {
cgroupV1MountPoint = fields[1]
}
Comment on lines +76 to +78
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it ok to just do this?

Suggested change
if len(cgroupV1MountPoint) == 0 {
cgroupV1MountPoint = fields[1]
}
cgroupV1MountPoint = fields[1]

i.e. always update the var if more cgroups v1 mount points are found or should we only catch the first one?
(i don't know the format)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[root@10-29-14-249 ~]# cat /proc/mounts | grep cgroup
tmpfs /sys/fs/cgroup tmpfs ro,seclabel,nosuid,nodev,noexec,mode=755 0 0
cgroup /sys/fs/cgroup/systemd cgroup rw,seclabel,nosuid,nodev,noexec,relatime,xattr,release_agent=/usr/lib/systemd/systemd-cgroups-agent,name=systemd 0 0
cgroup /sys/fs/cgroup/blkio cgroup rw,seclabel,nosuid,nodev,noexec,relatime,blkio 0 0
cgroup /sys/fs/cgroup/hugetlb cgroup rw,seclabel,nosuid,nodev,noexec,relatime,hugetlb 0 0
cgroup /sys/fs/cgroup/perf_event cgroup rw,seclabel,nosuid,nodev,noexec,relatime,perf_event 0 0
cgroup /sys/fs/cgroup/memory cgroup rw,seclabel,nosuid,nodev,noexec,relatime,memory 0 0
cgroup /sys/fs/cgroup/net_cls,net_prio cgroup rw,seclabel,nosuid,nodev,noexec,relatime,net_prio,net_cls 0 0
cgroup /sys/fs/cgroup/cpu,cpuacct cgroup rw,seclabel,nosuid,nodev,noexec,relatime,cpuacct,cpu 0 0
cgroup /sys/fs/cgroup/devices cgroup rw,seclabel,nosuid,nodev,noexec,relatime,devices 0 0
cgroup /sys/fs/cgroup/pids cgroup rw,seclabel,nosuid,nodev,noexec,relatime,pids 0 0
cgroup /sys/fs/cgroup/cpuset cgroup rw,seclabel,nosuid,nodev,noexec,relatime,cpuset 0 0
cgroup /sys/fs/cgroup/freezer cgroup rw,seclabel,nosuid,nodev,noexec,relatime,freezer 0 0

For cgroup v1, this is an example. I prefer to get the first one.

}
}
}
// Return cgroups v1 mount point if no cgroups v2 mount point is found.
if len(cgroupV1MountPoint) != 0 {
return cgroupV1MountPoint, nil
}
return "", fmt.Errorf("cannot get a cgroupfs mount point from %q", path)
}

// Validate is part of the system.Validator interface.
func (c *CgroupsValidator) Validate(spec SysSpec) (warns, errs []error) {
// Get the subsystems from /sys/fs/cgroup/cgroup.controllers when cgroup v2 is used.
// Get the subsystems from /sys/fs/cgroup/cgroup.controllers when cgroups v2 is used.
// /proc/cgroups is meaningless for v2
// https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features
var st unix.Statfs_t
var err error
unifiedMountpoint, err := getUnifiedMountpoint(mountsFilePath)
if err != nil {
return nil, []error{fmt.Errorf("cannot get a cgroup mount point: %w", err)}
}
if err := unix.Statfs(unifiedMountpoint, &st); err != nil {
return nil, []error{fmt.Errorf("cannot statfs the cgroupv2 root: %w", err)}
}
var requiredCgroupSpec []string
var optionalCgroupSpec []string
var subsystems []string
var warn error
if st.Type == unix.CGROUP2_SUPER_MAGIC {
subsystems, err = c.getCgroupV2Subsystems()
subsystems, err, warn = c.getCgroupV2Subsystems(unifiedMountpoint)
if err != nil {
return nil, []error{fmt.Errorf("failed to get cgroups v2 subsystems: %w", err)}
}
if warn != nil {
warns = append(warns, warn)
}
requiredCgroupSpec = spec.CgroupsV2
optionalCgroupSpec = spec.CgroupsV2Optional
} else {
Expand Down Expand Up @@ -110,11 +155,10 @@ func (c *CgroupsValidator) validateCgroupSubsystems(cgroups, subsystems []string
missing = append(missing, cgroup)
}
return missing

}

func (c *CgroupsValidator) getCgroupV1Subsystems() ([]string, error) {
// Get the subsystems from /proc/cgroups when cgroup v1 is used.
// Get the subsystems from /proc/cgroups when cgroups v1 is used.
f, err := os.Open("/proc/cgroups")
if err != nil {
return nil, err
Expand All @@ -138,19 +182,50 @@ func (c *CgroupsValidator) getCgroupV1Subsystems() ([]string, error) {
return subsystems, nil
}

func (c *CgroupsValidator) getCgroupV2Subsystems() ([]string, error) {
func (c *CgroupsValidator) getCgroupV2Subsystems(unifiedMountpoint string) ([]string, error, error) {
// Some controllers are implicitly enabled by the kernel.
// Those controllers do not appear in /sys/fs/cgroup/cgroup.controllers.
// https://github.com/torvalds/linux/blob/v5.3/kernel/cgroup/cgroup.c#L433-L434
// We assume these are always available, as it is hard to detect availability.
// So, we hardcode the following as "pseudo" controllers.
// - devices: implemented in kernel 4.15
// - freezer: implemented in kernel 5.2
pseudo := []string{"devices", "freezer"}
// For freezer, we use checkCgroupV2Freeze() to check.
// For others, we assume these are always available, as it is hard to detect availability.
// We hardcode the following as initial controllers.
// - devices: implemented in kernel 4.15.
subsystems := []string{"devices"}
freezeSupported, warn := checkCgroupV2Freeze(unifiedMountpoint)
if freezeSupported {
subsystems = append(subsystems, "freezer")
}
data, err := ioutil.ReadFile(filepath.Join(unifiedMountpoint, "cgroup.controllers"))
if err != nil {
return nil, err
return nil, err, warn
}
subsystems := append(pseudo, strings.Fields(string(data))...)
return subsystems, nil
subsystems = append(subsystems, strings.Fields(string(data))...)
return subsystems, err, warn
}

// checkCgroupV2Freeze checks if the freezer controller is enabled in Linux kernels 5.2.
// It determines that by creating a cgroup.freeze file under the unified mountpoint location.
func checkCgroupV2Freeze(unifiedMountpoint string) (isCgroupfs bool, warn error) {
const freezeFile = "cgroup.freeze"
tmpDir, warn := os.MkdirTemp(unifiedMountpoint, "freezer-test")
if warn != nil {
return
}
defer func() {
err := os.RemoveAll(tmpDir)
if err != nil {
warn = fmt.Errorf("error removing directory %q: %v", tmpDir, err)
}
}()
_, warn = os.Stat(filepath.Join(tmpDir, freezeFile))
if os.IsNotExist(warn) {
return
} else if warn != nil {
// If the err is not NotExist error, it means that `cgroup.freeze` exists.
isCgroupfs = true
warn = fmt.Errorf("could not stat %q file in %q: %v", freezeFile, tmpDir, warn)
return
}
isCgroupfs = true
return
}
73 changes: 72 additions & 1 deletion validators/cgroup_validator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,14 @@ limitations under the License.
package system

import (
"os"
"testing"

"github.com/stretchr/testify/assert"
)

func TestValidateCgroupSubsystem(t *testing.T) {
// hardcoded cgroup v2 subsystems
// hardcoded cgroups v2 subsystems
pseudoSubsystems := []string{"devices", "freezer"}

v := &CgroupsValidator{
Expand Down Expand Up @@ -93,3 +94,73 @@ func TestValidateCgroupSubsystem(t *testing.T) {
})
}
}

func TestGetUnifiedMountpoint(t *testing.T) {
tests := map[string]struct {
mountsFileContent string
expectedErr bool
expectedPath string
}{
"cgroups v2": {
mountsFileContent: "cgroup2 /sys/fs/cgroup cgroup2 rw,seclabel,nosuid,nodev,noexec,relatime 0 0",
expectedErr: false,
expectedPath: "/sys/fs/cgroup",
},
"cgroups v1": {
mountsFileContent: "cgroup /sys/fs/cgroup cgroup rw,seclabel,nosuid,nodev,noexec,relatime 0 0",
expectedErr: false,
expectedPath: "/sys/fs/cgroup",
},
"empty file": {
mountsFileContent: "",
expectedErr: true,
expectedPath: "",
},
"no cgroup mounts": {
mountsFileContent: `proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0
sysfs /sys sysfs rw,seclabel,nosuid,nodev,noexec,relatime 0 0`,
expectedErr: true,
expectedPath: "",
},
"multiple cgroups v1 and v2": {
mountsFileContent: `cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory
cgroup2 /sys/fs/cgroup/unified cgroup2 rw,seclabel,nosuid,nodev,noexec,relatime`,
expectedErr: false,
expectedPath: "/sys/fs/cgroup/unified",
},
"cgroups v1 only with multiple subsystems": {
mountsFileContent: `cgroup /sys/fs/cgroup/cpuset cgroup rw,nosuid,nodev,noexec,relatime,cpuset
cgroup /sys/fs/cgroup/memory cgroup rw,nosuid,nodev,noexec,relatime,memory`,
expectedErr: false,
expectedPath: "/sys/fs/cgroup/cpuset", // First valid cgroups v1 path
},
"no valid cgroup": {
mountsFileContent: "proc /proc proc rw,nosuid,nodev,noexec,relatime 0 0\nsysfs /sys sysfs rw,seclabel,nosuid,nodev,noexec,relatime 0 0",
expectedErr: true,
expectedPath: "",
},
}

for desc, test := range tests {
t.Run(desc, func(t *testing.T) {
tmpFile, err := os.CreateTemp("", "mounts")
assert.NoError(t, err, "Unexpected error creating temp file")
defer os.Remove(tmpFile.Name())

_, err = tmpFile.Write([]byte(test.mountsFileContent))
assert.NoError(t, err, "Unexpected error writing to temp file")
tmpFile.Close()

path, err := getUnifiedMountpoint(tmpFile.Name())

if test.expectedErr {
assert.Error(t, err, "Expected error but got none")
} else {
assert.NoError(t, err, "Did not expect error but got one: %s", err)
}

assert.Equal(t, test.expectedPath, path, "Expected cgroup path mismatch")
})
}
}
9 changes: 5 additions & 4 deletions validators/types_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,12 @@ var DefaultSysSpec = SysSpec{
{Name: "IPC_NS"},
{Name: "UTS_NS"},
{Name: "CGROUPS"},
{Name: "CGROUP_CPUACCT"},
{Name: "CGROUP_BPF"}, // cgroups v2
{Name: "CGROUP_CPUACCT"}, // cgroups v1 cpuacct
{Name: "CGROUP_DEVICE"},
{Name: "CGROUP_FREEZER"},
{Name: "CGROUP_FREEZER"}, // cgroups v1 freezer
{Name: "CGROUP_PIDS"},
{Name: "CGROUP_SCHED"},
{Name: "CGROUP_SCHED"}, // cgroups v1 & v2 cpu
{Name: "CPUSETS"},
{Name: "MEMCG"},
{Name: "INET"},
Expand All @@ -71,7 +72,7 @@ var DefaultSysSpec = SysSpec{
// and therefore lacks corresponding hugetlb cgroup
"hugetlb",
// The blkio cgroup is optional since some kernels are compiled without support for block I/O throttling.
// Containerd and cri-o will use blkio to track disk I/O and throttling in both cgroup v1 and v2.
// Containerd and cri-o will use blkio to track disk I/O and throttling in both cgroups v1 and v2.
"blkio",
},
CgroupsV2: []string{"cpu", "cpuset", "devices", "freezer", "memory", "pids"},
Expand Down