Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix grace period override used for immediate evictions in eviction manager #119570

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions pkg/kubelet/eviction/eviction_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ const (
signalEmptyDirFsLimit string = "emptydirfs.limit"
)

const (
immediateEvictionGracePeriodSeconds = 1
)

// managerImpl implements Manager
type managerImpl struct {
// used to track time
Expand Down Expand Up @@ -381,7 +385,7 @@ func (m *managerImpl) synchronize(diskInfoProvider DiskInfoProvider, podFunc Act
// we kill at most a single pod during each eviction interval
for i := range activePods {
pod := activePods[i]
gracePeriodOverride := int64(0)
gracePeriodOverride := int64(immediateEvictionGracePeriodSeconds)
if !isHardEvictionThreshold(thresholdToReclaim) {
gracePeriodOverride = m.config.MaxPodGracePeriodSeconds
}
Expand Down Expand Up @@ -501,7 +505,7 @@ func (m *managerImpl) emptyDirLimitEviction(podStats statsapi.PodStats, pod *v1.
used := podVolumeUsed[pod.Spec.Volumes[i].Name]
if used != nil && size != nil && size.Sign() == 1 && used.Cmp(*size) > 0 {
// the emptyDir usage exceeds the size limit, evict the pod
if m.evictPod(pod, 0, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil, nil) {
if m.evictPod(pod, immediateEvictionGracePeriodSeconds, fmt.Sprintf(emptyDirMessageFmt, pod.Spec.Volumes[i].Name, size.String()), nil, nil) {
metrics.Evictions.WithLabelValues(signalEmptyDirFsLimit).Inc()
return true
}
Expand Down Expand Up @@ -529,7 +533,7 @@ func (m *managerImpl) podEphemeralStorageLimitEviction(podStats statsapi.PodStat
if podEphemeralStorageTotalUsage.Cmp(podEphemeralStorageLimit) > 0 {
// the total usage of pod exceeds the total size limit of containers, evict the pod
message := fmt.Sprintf(podEphemeralStorageMessageFmt, podEphemeralStorageLimit.String())
if m.evictPod(pod, 0, message, nil, nil) {
if m.evictPod(pod, immediateEvictionGracePeriodSeconds, message, nil, nil) {
metrics.Evictions.WithLabelValues(signalEphemeralPodFsLimit).Inc()
return true
}
Expand All @@ -555,7 +559,7 @@ func (m *managerImpl) containerEphemeralStorageLimitEviction(podStats statsapi.P

if ephemeralStorageThreshold, ok := thresholdsMap[containerStat.Name]; ok {
if ephemeralStorageThreshold.Cmp(*containerUsed) < 0 {
if m.evictPod(pod, 0, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil, nil) {
if m.evictPod(pod, immediateEvictionGracePeriodSeconds, fmt.Sprintf(containerEphemeralStorageMessageFmt, containerStat.Name, ephemeralStorageThreshold.String()), nil, nil) {
metrics.Evictions.WithLabelValues(signalEphemeralContainerFsLimit).Inc()
return true
}
Expand Down
127 changes: 105 additions & 22 deletions pkg/kubelet/eviction/eviction_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,11 @@ func makePodWithMemoryStats(name string, priority int32, requests v1.ResourceLis
return pod, podStats
}

func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string) (*v1.Pod, statsapi.PodStats) {
func makePodWithDiskStats(name string, priority int32, requests v1.ResourceList, limits v1.ResourceList, rootFsUsed, logsUsed, perLocalVolumeUsed string, volumes []v1.Volume) (*v1.Pod, statsapi.PodStats) {
pod := newPod(name, priority, []v1.Container{
newContainer(name, requests, limits),
}, nil)
}, volumes)

podStats := newPodDiskStats(pod, parseQuantity(rootFsUsed), parseQuantity(logsUsed), parseQuantity(perLocalVolumeUsed))
return pod, podStats
}
Expand Down Expand Up @@ -294,7 +295,7 @@ func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
wantPodStatus: v1.PodStatus{
Phase: v1.PodFailed,
Reason: "Evicted",
Message: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. ",
Message: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. Container above-requests was using 700Mi, request is 100Mi, has larger consumption of ephemeral-storage. ",
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The messages here changed because previously in the test helper we were not setting the container name on the ContainerStats objects so it wasn't picking up the resource usage

},
},
}
Expand All @@ -312,7 +313,7 @@ func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, nil)
pods = append(pods, pod)
podStats[pod] = podStat
}
Expand Down Expand Up @@ -371,7 +372,7 @@ func TestDiskPressureNodeFs_VerifyPodStatus(t *testing.T) {
Type: "DisruptionTarget",
Status: "True",
Reason: "TerminationByKubelet",
Message: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. ",
Message: "The node was low on resource: ephemeral-storage. Threshold quantity: 2Gi, available: 1536Mi. Container above-requests was using 700Mi, request is 100Mi, has larger consumption of ephemeral-storage. ",
})
}

Expand Down Expand Up @@ -534,8 +535,8 @@ func TestMemoryPressure(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod = *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}

// the best-effort pod should not admit, burstable should
Expand Down Expand Up @@ -662,7 +663,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, nil)
pods = append(pods, pod)
podStats[pod] = podStat
}
Expand Down Expand Up @@ -713,7 +714,7 @@ func TestDiskPressureNodeFs(t *testing.T) {
}

// create a best effort pod to test admission
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi")
podToAdmit, _ := podMaker("pod-to-admit", defaultPriority, newResourceList("", "", ""), newResourceList("", "", ""), "0Gi", "0Gi", "0Gi", nil)

// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc)
Expand Down Expand Up @@ -793,8 +794,8 @@ func TestDiskPressureNodeFs(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod = *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}

// try to admit our pod (should fail)
Expand Down Expand Up @@ -927,8 +928,8 @@ func TestMinReclaim(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}

// reduce memory pressure, but not below the min-reclaim amount
Expand All @@ -947,8 +948,8 @@ func TestMinReclaim(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod = *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}

// reduce memory pressure and ensure the min-reclaim amount
Expand Down Expand Up @@ -997,7 +998,7 @@ func TestNodeReclaimFuncs(t *testing.T) {
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, nil)
pods = append(pods, pod)
podStats[pod] = podStat
}
Expand Down Expand Up @@ -1153,8 +1154,8 @@ func TestNodeReclaimFuncs(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}

// reduce disk pressure
Expand Down Expand Up @@ -1372,8 +1373,8 @@ func TestInodePressureNodeFsInodes(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod = *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}

// try to admit our pod (should fail)
Expand Down Expand Up @@ -1554,6 +1555,88 @@ func TestStaticCriticalPodsAreNotEvicted(t *testing.T) {
}
}

func TestStorageLimitEvictions(t *testing.T) {
volumeSizeLimit := resource.MustParse("1Gi")

testCases := map[string]struct {
pod podToMake
volumes []v1.Volume
}{
"eviction due to rootfs above limit": {
pod: podToMake{name: "rootfs-above-limits", priority: scheduling.DefaultPriorityWhenNoDefaultClassExists, requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), rootFsUsed: "2Gi"},
},
"eviction due to logsfs above limit": {
pod: podToMake{name: "logsfs-above-limits", priority: scheduling.DefaultPriorityWhenNoDefaultClassExists, requests: newResourceList("", "", "1Gi"), limits: newResourceList("", "", "1Gi"), logsFsUsed: "2Gi"},
},
"eviction due to local volume above limit": {
pod: podToMake{name: "localvolume-above-limits", priority: scheduling.DefaultPriorityWhenNoDefaultClassExists, requests: newResourceList("", "", ""), limits: newResourceList("", "", ""), perLocalVolumeUsed: "2Gi"},
volumes: []v1.Volume{{
Name: "emptyDirVolume",
VolumeSource: v1.VolumeSource{
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: &volumeSizeLimit,
},
},
}},
},
}
for name, tc := range testCases {
t.Run(name, func(t *testing.T) {
podMaker := makePodWithDiskStats
summaryStatsMaker := makeMemoryStats
podsToMake := []podToMake{
tc.pod,
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.priority, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed, tc.volumes)
pods = append(pods, pod)
podStats[pod] = podStat
}

podToEvict := pods[0]
activePodsFunc := func() []*v1.Pod {
return pods
}

fakeClock := testingclock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
diskGC := &mockDiskGC{err: nil}
nodeRef := &v1.ObjectReference{
Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: "",
}

config := Config{}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("2Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
localStorageCapacityIsolation: true,
}

manager.synchronize(diskInfoProvider, activePodsFunc)

// verify the right pod was killed with the right grace period.
if podKiller.pod != podToEvict {
t.Errorf("Manager should have killed pod: %v, but instead killed: %v", podToEvict.Name, podKiller.pod.Name)
}
if *podKiller.gracePeriodOverride != 1 {
t.Errorf("Manager should have evicted with gracePeriodOverride of 1, but used: %v", *podKiller.gracePeriodOverride)
}
})
}
}

// TestAllocatableMemoryPressure
func TestAllocatableMemoryPressure(t *testing.T) {
podMaker := makePodWithMemoryStats
Expand Down Expand Up @@ -1647,8 +1730,8 @@ func TestAllocatableMemoryPressure(t *testing.T) {
t.Errorf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod.Name, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
if observedGracePeriod != int64(1) {
t.Errorf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 1, observedGracePeriod)
}
// reset state
podKiller.pod = nil
Expand Down
3 changes: 2 additions & 1 deletion pkg/kubelet/eviction/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1996,8 +1996,9 @@ func newPodDiskStats(pod *v1.Pod, rootFsUsed, logsUsed, perLocalVolumeUsed resou

rootFsUsedBytes := uint64(rootFsUsed.Value())
logsUsedBytes := uint64(logsUsed.Value())
for range pod.Spec.Containers {
for _, container := range pod.Spec.Containers {
result.Containers = append(result.Containers, statsapi.ContainerStats{
Name: container.Name,
Rootfs: &statsapi.FsStats{
UsedBytes: &rootFsUsedBytes,
},
Expand Down