Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubelet: fix e2e-node cgroups test on cgroup v2 #89897

Merged
merged 4 commits into from Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions pkg/kubelet/cm/BUILD
Expand Up @@ -70,6 +70,7 @@ go_library(
"//vendor/github.com/docker/go-units:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/k8s.io/utils/io:go_default_library",
Expand Down Expand Up @@ -121,6 +122,7 @@ go_library(
"//vendor/github.com/docker/go-units:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/k8s.io/utils/io:go_default_library",
Expand Down
173 changes: 162 additions & 11 deletions pkg/kubelet/cm/cgroup_manager_linux.go
Expand Up @@ -18,15 +18,18 @@ package cm

import (
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
"strconv"
"strings"
"sync"
"time"

libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
cgroupfs "github.com/opencontainers/runc/libcontainer/cgroups/fs"
cgroupfs2 "github.com/opencontainers/runc/libcontainer/cgroups/fs2"
cgroupsystemd "github.com/opencontainers/runc/libcontainer/cgroups/systemd"
libcontainerconfigs "github.com/opencontainers/runc/libcontainer/configs"
"k8s.io/klog"
Expand All @@ -36,6 +39,7 @@ import (
"k8s.io/apimachinery/pkg/util/sets"
utilfeature "k8s.io/apiserver/pkg/util/feature"
kubefeatures "k8s.io/kubernetes/pkg/features"
cmutil "k8s.io/kubernetes/pkg/kubelet/cm/util"
"k8s.io/kubernetes/pkg/kubelet/metrics"
)

Expand Down Expand Up @@ -228,6 +232,12 @@ func (m *cgroupManagerImpl) buildCgroupPaths(name CgroupName) map[string]string
return cgroupPaths
}

// buildCgroupUnifiedPath builds a path to the specified name.
func (m *cgroupManagerImpl) buildCgroupUnifiedPath(name CgroupName) string {
cgroupFsAdaptedName := m.Name(name)
return path.Join(cmutil.CgroupRoot, cgroupFsAdaptedName)
}

// TODO(filbranden): This logic belongs in libcontainer/cgroup/systemd instead.
// It should take a libcontainerconfigs.Cgroup.Path field (rather than Name and Parent)
// and split it appropriately, using essentially the logic below.
Expand All @@ -246,6 +256,21 @@ func updateSystemdCgroupInfo(cgroupConfig *libcontainerconfigs.Cgroup, cgroupNam

// Exists checks if all subsystem cgroups already exist
func (m *cgroupManagerImpl) Exists(name CgroupName) bool {
if libcontainercgroups.IsCgroup2UnifiedMode() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this eliminates verification on particular controllers being present and available on a cgroupv2 host.

was that intentional?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was intentional as on cgroup v2 the cgroup existence depend only on the cgroup directory being created. Should I change the test to also verify if the controllers we need are enabled?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed the implementation to also check if the controllers we need are enabled.

cgroupPath := m.buildCgroupUnifiedPath(name)
neededControllers := getSupportedUnifiedControllers()
enabledControllers, err := readUnifiedControllers(cgroupPath)
if err != nil {
return false
}
difference := neededControllers.Difference(enabledControllers)
if difference.Len() > 0 {
klog.V(4).Infof("The Cgroup %v has some missing controllers: %v", name, difference)
return false
}
return true
}

// Get map of all cgroup paths on the system for the particular cgroup
cgroupPaths := m.buildCgroupPaths(name)

Expand Down Expand Up @@ -338,7 +363,7 @@ func getSupportedSubsystems() map[subsystem]bool {
return supportedSubsystems
}

// setSupportedSubsystems sets cgroup resource limits only on the supported
// setSupportedSubsystemsV1 sets cgroup resource limits on cgroup v1 only on the supported
// subsystems. ie. cpu and memory. We don't use libcontainer's cgroup/fs/Set()
// method as it doesn't allow us to skip updates on the devices cgroup
// Allowing or denying all devices by writing 'a' to devices.allow or devices.deny is
Expand All @@ -347,7 +372,7 @@ func getSupportedSubsystems() map[subsystem]bool {
// We would like to skip setting any values on the device cgroup in this case
// but this is not possible with libcontainers Set() method
// See https://github.com/opencontainers/runc/issues/932
func setSupportedSubsystems(cgroupConfig *libcontainerconfigs.Cgroup) error {
func setSupportedSubsystemsV1(cgroupConfig *libcontainerconfigs.Cgroup) error {
for sys, required := range getSupportedSubsystems() {
if _, ok := cgroupConfig.Paths[sys.Name()]; !ok {
if required {
Expand Down Expand Up @@ -388,6 +413,106 @@ func getCpuMax(cpuQuota *int64, cpuPeriod *uint64) string {
return fmt.Sprintf("%s %s", quotaStr, periodStr)
}

// readUnifiedControllers reads the controllers available at the specified cgroup
func readUnifiedControllers(path string) (sets.String, error) {
controllersFileContent, err := ioutil.ReadFile(filepath.Join(path, "cgroup.controllers"))
if err != nil {
return nil, err
}
controllers := strings.Fields(string(controllersFileContent))
return sets.NewString(controllers...), nil
}

var (
availableRootControllersOnce sync.Once
availableRootControllers sets.String
)

// getSupportedUnifiedControllers returns a set of supported controllers when running on cgroup v2
func getSupportedUnifiedControllers() sets.String {
// This is the set of controllers used by the Kubelet
supportedControllers := sets.NewString("cpu", "cpuset", "memory", "hugetlb")
if utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportPodPidsLimit) || utilfeature.DefaultFeatureGate.Enabled(kubefeatures.SupportNodePidsLimit) {
supportedControllers.Insert("pids")
}
// Memoize the set of controllers that are present in the root cgroup
availableRootControllersOnce.Do(func() {
var err error
availableRootControllers, err = readUnifiedControllers(cmutil.CgroupRoot)
if err != nil {
panic(fmt.Errorf("cannot read cgroup controllers at %s", cmutil.CgroupRoot))
}
})
// Return the set of controllers that are supported both by the Kubelet and by the kernel
return supportedControllers.Intersection(availableRootControllers)
}

// propagateControllers on an unified hierarchy enables all the supported controllers for the specified cgroup
func propagateControllers(path string) error {
if err := os.MkdirAll(path, 0755); err != nil {
return fmt.Errorf("failed to create cgroup %q : %v", path, err)
}

// Retrieve all the supported controllers from the cgroup root
controllersFileContent, err := ioutil.ReadFile(filepath.Join(cmutil.CgroupRoot, "cgroup.controllers"))
if err != nil {
return fmt.Errorf("failed to read controllers from %q : %v", cmutil.CgroupRoot, err)
}

supportedControllers := getSupportedUnifiedControllers()

// The retrieved content looks like: "cpuset cpu io memory hugetlb pids". Prepend each of the controllers
giuseppe marked this conversation as resolved.
Show resolved Hide resolved
// with '+', so we have something like "+cpuset +cpu +io +memory +hugetlb +pids"
controllers := ""
for _, controller := range strings.Fields(string(controllersFileContent)) {
// ignore controllers we don't care about
if !supportedControllers.Has(controller) {
continue
}

sep := " +"
if controllers == "" {
sep = "+"
}
controllers = controllers + sep + controller
}

current := cmutil.CgroupRoot
relPath, err := filepath.Rel(cmutil.CgroupRoot, path)
if err != nil {
return fmt.Errorf("failed to get relative path to cgroup root from %q: %v", path, err)
}
// Write the controllers list to each "cgroup.subtree_control" file until it reaches the parent cgroup.
// For the /foo/bar/baz cgroup, controllers must be enabled sequentially in the files:
// - /sys/fs/cgroup/foo/cgroup.subtree_control
// - /sys/fs/cgroup/foo/bar/cgroup.subtree_control
for _, p := range strings.Split(filepath.Dir(relPath), "/") {
current = filepath.Join(current, p)
if err := ioutil.WriteFile(filepath.Join(current, "cgroup.subtree_control"), []byte(controllers), 0755); err != nil {
return fmt.Errorf("failed to enable controllers on %q: %v", cmutil.CgroupRoot, err)
}
}
return nil
}

// setResourcesV2 sets cgroup resource limits on cgroup v2
func setResourcesV2(cgroupConfig *libcontainerconfigs.Cgroup) error {
if err := propagateControllers(cgroupConfig.Path); err != nil {
return err
}
allowAll := true
cgroupConfig.Resources.AllowAllDevices = &allowAll

manager, err := cgroupfs2.NewManager(cgroupConfig, cgroupConfig.Path, false)
if err != nil {
return fmt.Errorf("failed to create cgroup v2 manager: %v", err)
}
config := &libcontainerconfigs.Config{
Cgroups: cgroupConfig,
}
return manager.Set(config)
}

func (m *cgroupManagerImpl) toResources(resourceConfig *ResourceConfig) *libcontainerconfigs.Resources {
resources := &libcontainerconfigs.Resources{}
if resourceConfig == nil {
Expand Down Expand Up @@ -454,12 +579,17 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
resourceConfig := cgroupConfig.ResourceParameters
resources := m.toResources(resourceConfig)

cgroupPaths := m.buildCgroupPaths(cgroupConfig.Name)

libcontainerCgroupConfig := &libcontainerconfigs.Cgroup{
Resources: resources,
Paths: cgroupPaths,
}

unified := libcontainercgroups.IsCgroup2UnifiedMode()
if unified {
libcontainerCgroupConfig.Path = m.buildCgroupUnifiedPath(cgroupConfig.Name)
} else {
libcontainerCgroupConfig.Paths = m.buildCgroupPaths(cgroupConfig.Name)
}

// libcontainer consumes a different field and expects a different syntax
// depending on the cgroup driver in use, so we need this conditional here.
if m.adapter.cgroupManagerType == libcontainerSystemd {
Expand All @@ -472,8 +602,14 @@ func (m *cgroupManagerImpl) Update(cgroupConfig *CgroupConfig) error {
libcontainerCgroupConfig.PidsLimit = *cgroupConfig.ResourceParameters.PidsLimit
}

if err := setSupportedSubsystems(libcontainerCgroupConfig); err != nil {
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
if unified {
if err := setResourcesV2(libcontainerCgroupConfig); err != nil {
return fmt.Errorf("failed to set resources for cgroup %v: %v", cgroupConfig.Name, err)
}
} else {
if err := setSupportedSubsystemsV1(libcontainerCgroupConfig); err != nil {
return fmt.Errorf("failed to set supported cgroup subsystems for cgroup %v: %v", cgroupConfig.Name, err)
}
}
return nil
}
Expand Down Expand Up @@ -619,10 +755,25 @@ func toResourceStats(stats *libcontainercgroups.Stats) *ResourceStats {

// Get sets the ResourceParameters of the specified cgroup as read from the cgroup fs
func (m *cgroupManagerImpl) GetResourceStats(name CgroupName) (*ResourceStats, error) {
cgroupPaths := m.buildCgroupPaths(name)
stats, err := getStatsSupportedSubsystems(cgroupPaths)
if err != nil {
return nil, fmt.Errorf("failed to get stats supported cgroup subsystems for cgroup %v: %v", name, err)
var err error
var stats *libcontainercgroups.Stats
if libcontainercgroups.IsCgroup2UnifiedMode() {
cgroupPath := m.buildCgroupUnifiedPath(name)
manager, err := cgroupfs2.NewManager(nil, cgroupPath, false)
if err != nil {
return nil, fmt.Errorf("failed to create cgroup v2 manager: %v", err)
}

stats, err = manager.GetStats()
if err != nil {
return nil, fmt.Errorf("failed to get stats for cgroup %v: %v", name, err)
}
} else {
cgroupPaths := m.buildCgroupPaths(name)
stats, err = getStatsSupportedSubsystems(cgroupPaths)
if err != nil {
return nil, fmt.Errorf("failed to get stats supported cgroup subsystems for cgroup %v: %v", name, err)
}
}
return toResourceStats(stats), nil
}
1 change: 1 addition & 0 deletions pkg/kubelet/cm/container_manager_linux.go
Expand Up @@ -162,6 +162,7 @@ func validateSystemRequirements(mountUtil mount.Interface) (features, error) {
}

if cgroups.IsCgroup2UnifiedMode() {
f.cpuHardcapping = true
return f, nil
}

Expand Down
2 changes: 2 additions & 0 deletions test/e2e_node/BUILD
Expand Up @@ -21,6 +21,8 @@ go_library(
"util_sriov.go",
"util_xfs_linux.go",
"util_xfs_unsupported.go",
"utils_linux.go",
"utils_unsupported.go",
],
importpath = "k8s.io/kubernetes/test/e2e_node",
visibility = ["//visibility:public"],
Expand Down
9 changes: 7 additions & 2 deletions test/e2e_node/hugepages_test.go
Expand Up @@ -50,8 +50,13 @@ func makePodToVerifyHugePages(baseName string, hugePagesLimit resource.Quantity)
cgroupFsName = cgroupName.ToCgroupfs()
}

// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.limit_in_bytes
command := fmt.Sprintf("expected=%v; actual=$(cat /tmp/hugetlb/%v/hugetlb.2MB.limit_in_bytes); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), cgroupFsName)
command := ""
// this command takes the expected value and compares it against the actual value for the pod cgroup hugetlb.2MB.<LIMIT>
if IsCgroup2UnifiedMode() {
command = fmt.Sprintf("expected=%v; actual=$(cat /tmp/%v/hugetlb.2MB.max); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), cgroupFsName)
} else {
command = fmt.Sprintf("expected=%v; actual=$(cat /tmp/hugetlb/%v/hugetlb.2MB.limit_in_bytes); if [ \"$expected\" -ne \"$actual\" ]; then exit 1; fi; ", hugePagesLimit.Value(), cgroupFsName)
}
framework.Logf("Pod to run command: %v", command)
pod := &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand Down