Skip to content

Commit

Permalink
Merge pull request kubernetes#117793 from tzneal/memory-oom-group-sup…
Browse files Browse the repository at this point in the history
…port

use the cgroup aware OOM killer if available
  • Loading branch information
k8s-ci-robot committed Jun 12, 2023
2 parents 07646db + 4e20a8f commit 86d7860
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 11 deletions.
9 changes: 9 additions & 0 deletions pkg/kubelet/kuberuntime/kuberuntime_container_linux.go
Expand Up @@ -215,6 +215,15 @@ func (m *kubeGenericRuntimeManager) calculateLinuxResources(cpuRequest, cpuLimit
resources.CpuPeriod = cpuPeriod
}

// runc requires cgroupv2 for unified mode
if libcontainercgroups.IsCgroup2UnifiedMode() {
resources.Unified = map[string]string{
// Ask the kernel to kill all processes in the container cgroup in case of OOM.
// See memory.oom.group in https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html for
// more info.
"memory.oom.group": "1",
}
}
return &resources
}

Expand Down
Expand Up @@ -19,7 +19,6 @@ package e2enode
import (
"context"
"fmt"
"time"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -29,35 +28,52 @@ import (
admissionapi "k8s.io/pod-security-admission/api"

"github.com/onsi/ginkgo/v2"
libcontainercgroups "github.com/opencontainers/runc/libcontainer/cgroups"
)

type testCase struct {
name string
podSpec *v1.Pod
oomTargetContainerName string
}

const PodOOMKilledTimeout = 2 * time.Minute

var _ = SIGDescribe("OOMKiller [LinuxOnly] [NodeConformance]", func() {
f := framework.NewDefaultFramework("oomkiller-test")
f.NamespacePodSecurityEnforceLevel = admissionapi.LevelPrivileged

containerName := "oomkill-target-container"
oomPodSpec := getOOMTargetPod("oomkill-target-pod", containerName)
runOomKillerTest(f, testCase{podSpec: oomPodSpec, oomTargetContainerName: containerName})
testCases := []testCase{{
name: "single process container",
oomTargetContainerName: "oomkill-single-target-container",
podSpec: getOOMTargetPod("oomkill-target-pod", "oomkill-single-target-container",
getOOMTargetContainer),
}}

// If using cgroup v2, we set memory.oom.group=1 for the container cgroup so that any process which gets OOM killed
// in the process, causes all processes in the container to get OOM killed
if libcontainercgroups.IsCgroup2UnifiedMode() {
testCases = append(testCases, testCase{
name: "multi process container",
oomTargetContainerName: "oomkill-multi-target-container",
podSpec: getOOMTargetPod("oomkill-target-pod", "oomkill-multi-target-container",
getOOMTargetContainerMultiProcess),
})
}
for _, tc := range testCases {
runOomKillerTest(f, tc)
}
})

func runOomKillerTest(f *framework.Framework, testCase testCase) {
ginkgo.Context("", func() {
ginkgo.Context(testCase.name, func() {
ginkgo.BeforeEach(func() {
ginkgo.By("setting up the pod to be used in the test")
e2epod.NewPodClient(f).Create(context.TODO(), testCase.podSpec)
})

ginkgo.It("The containers terminated by OOM killer should have the reason set to OOMKilled", func() {

ginkgo.By("Waiting for the pod to be failed")
e2epod.WaitForPodTerminatedInNamespace(context.TODO(), f.ClientSet, testCase.podSpec.Name, "", f.Namespace.Name)
err := e2epod.WaitForPodTerminatedInNamespace(context.TODO(), f.ClientSet, testCase.podSpec.Name, "", f.Namespace.Name)
framework.ExpectNoError(err, "Failed waiting for pod to terminate, %s/%s", f.Namespace.Name, testCase.podSpec.Name)

ginkgo.By("Fetching the latest pod status")
pod, err := f.ClientSet.CoreV1().Pods(f.Namespace.Name).Get(context.TODO(), testCase.podSpec.Name, metav1.GetOptions{})
Expand Down Expand Up @@ -88,20 +104,22 @@ func verifyReasonForOOMKilledContainer(pod *v1.Pod, oomTargetContainerName strin
fmt.Sprintf("pod: %q, container: %q has unexpected reason: %q", pod.Name, container.Name, container.State.Terminated.Reason))
}

func getOOMTargetPod(podName string, ctnName string) *v1.Pod {
func getOOMTargetPod(podName string, ctnName string, createContainer func(name string) v1.Container) *v1.Pod {
return &v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Name: podName,
},
Spec: v1.PodSpec{
RestartPolicy: v1.RestartPolicyNever,
Containers: []v1.Container{
getOOMTargetContainer(ctnName),
createContainer(ctnName),
},
},
}
}

// getOOMTargetContainer returns a container with a single process, which attempts to allocate more memory than is
// allowed by the container memory limit.
func getOOMTargetContainer(name string) v1.Container {
return v1.Container{
Name: name,
Expand All @@ -122,3 +140,26 @@ func getOOMTargetContainer(name string) v1.Container {
},
}
}

// getOOMTargetContainerMultiProcess returns a container with two processes, one of which attempts to allocate more
// memory than is allowed by the container memory limit, and a second process which just sleeps.
func getOOMTargetContainerMultiProcess(name string) v1.Container {
return v1.Container{
Name: name,
Image: busyboxImage,
Command: []string{
"sh",
"-c",
// use the dd tool to attempt to allocate 20M in a block which exceeds the limit
"dd if=/dev/zero of=/dev/null bs=20M & sleep 86400",
},
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("15Mi"),
},
Limits: v1.ResourceList{
v1.ResourceMemory: resource.MustParse("15Mi"),
},
},
}
}

0 comments on commit 86d7860

Please sign in to comment.