Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Map a resource to multiple signals in eviction manager #47080

Merged
merged 1 commit into from
Sep 20, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
14 changes: 7 additions & 7 deletions pkg/kubelet/eviction/eviction_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,14 +435,14 @@ func (m *managerImpl) reclaimNodeLevelResources(resourceToReclaim v1.ResourceNam
}
// update our local observations based on the amount reported to have been reclaimed.
// note: this is optimistic, other things could have been still consuming the pressured resource in the interim.
signal := resourceToSignal[resourceToReclaim]
value, ok := observations[signal]
if !ok {
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
continue
for _, signal := range resourceClaimToSignal[resourceToReclaim] {
value, ok := observations[signal]
if !ok {
glog.Errorf("eviction manager: unable to find value associated with signal %v", signal)
continue
}
value.available.Add(*reclaimed)
}
value.available.Add(*reclaimed)

// evaluate all current thresholds to see if with adjusted observations, we think we have met min reclaim goals
if len(thresholdsMet(m.thresholdsMet, observations, true)) == 0 {
return true
Expand Down
292 changes: 292 additions & 0 deletions pkg/kubelet/eviction/eviction_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1428,3 +1428,295 @@ func TestAllocatableMemoryPressure(t *testing.T) {
}
}
}

// TestAllocatableNodeFsPressure
func TestAllocatableNodeFsPressure(t *testing.T) {
podMaker := makePodWithDiskStats
summaryStatsMaker := makeDiskStats

podsToMake := []podToMake{
{name: "guaranteed-low", requests: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), rootFsUsed: "200Mi"},
{name: "guaranteed-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), rootFsUsed: "800Mi"},
{name: "burstable-low", requests: newEphemeralStorageResourceList("300Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("300Mi", "200m", "1Gi"), logsFsUsed: "300Mi"},
{name: "burstable-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("800Mi", "200m", "1Gi"), rootFsUsed: "800Mi"},
{name: "best-effort-low", requests: newEphemeralStorageResourceList("300Mi", "", ""), limits: newEphemeralStorageResourceList("300Mi", "", ""), logsFsUsed: "300Mi"},
{name: "best-effort-high", requests: newEphemeralStorageResourceList("800Mi", "", ""), limits: newEphemeralStorageResourceList("800Mi", "", ""), rootFsUsed: "800Mi"},
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pods = append(pods, pod)
podStats[pod] = podStat
}
podToEvict := pods[5]
activePodsFunc := func() []*v1.Pod {
return pods
}

fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(newEphemeralStorageResourceList("6Gi", "1000m", "10Gi"), newEphemeralStorageResourceList("1Gi", "1000m", "10Gi"))
diskGC := &mockDiskGC{imageBytesFreed: int64(0), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}

config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("1Ki"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("3Gi", "6Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
}

// create a best effort pod to test admission
bestEffortPodToAdmit, _ := podMaker("best-admit", newEphemeralStorageResourceList("", "", ""), newEphemeralStorageResourceList("", "", ""), "0Gi", "", "")
burstablePodToAdmit, _ := podMaker("burst-admit", newEphemeralStorageResourceList("1Gi", "", ""), newEphemeralStorageResourceList("1Gi", "", ""), "1Gi", "", "")

// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)

// we should not have disk pressure
if manager.IsUnderDiskPressure() {
t.Fatalf("Manager should not report disk pressure")
}

// try to admit our pods (they should succeed)
expected := []bool{true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}

// induce disk pressure!
fakeClock.Step(1 * time.Minute)
pod, podStat := podMaker("guaranteed-high-2", newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "")
podStats[pod] = podStat
pods = append(pods, pod)
summaryProvider.result = summaryStatsMaker("6Gi", "6Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)

// we should have disk pressure
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}

// check the right pod was killed
if podKiller.pod != podToEvict {
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
}
// reset state
podKiller.pod = nil
podKiller.gracePeriodOverride = nil

// try to admit our pod (should fail)
expected = []bool{false, false}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}

// reduce disk pressure
fakeClock.Step(1 * time.Minute)
pods[5] = pods[len(pods)-1]
pods = pods[:len(pods)-1]

// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}

// try to admit our pod (should fail)
expected = []bool{false, false}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}

// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)

// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
t.Fatalf("Manager should not report disk pressure")
}

// no pod should have been killed
if podKiller.pod != nil {
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}

// all pods should admit now
expected = []bool{true, true}
for i, pod := range []*v1.Pod{bestEffortPodToAdmit, burstablePodToAdmit} {
if result := manager.Admit(&lifecycle.PodAdmitAttributes{Pod: pod}); expected[i] != result.Admit {
t.Fatalf("Admit pod: %v, expected: %v, actual: %v", pod, expected[i], result.Admit)
}
}
}

func TestNodeReclaimForAllocatableFuncs(t *testing.T) {
podMaker := makePodWithDiskStats
summaryStatsMaker := makeDiskStats
podsToMake := []podToMake{
{name: "guaranteed-low", requests: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("200Mi", "100m", "1Gi"), rootFsUsed: "200Mi"},
{name: "guaranteed-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), limits: newEphemeralStorageResourceList("800Mi", "100m", "1Gi"), rootFsUsed: "800Mi"},
{name: "burstable-low", requests: newEphemeralStorageResourceList("300Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("300Mi", "200m", "1Gi"), logsFsUsed: "300Mi"},
{name: "burstable-high", requests: newEphemeralStorageResourceList("800Mi", "100m", "100Mi"), limits: newEphemeralStorageResourceList("800Mi", "200m", "1Gi"), rootFsUsed: "800Mi"},
{name: "best-effort-low", requests: newEphemeralStorageResourceList("300Mi", "", ""), limits: newEphemeralStorageResourceList("300Mi", "", ""), logsFsUsed: "300Mi"},
{name: "best-effort-high", requests: newEphemeralStorageResourceList("800Mi", "", ""), limits: newEphemeralStorageResourceList("800Mi", "", ""), rootFsUsed: "800Mi"},
}
pods := []*v1.Pod{}
podStats := map[*v1.Pod]statsapi.PodStats{}
for _, podToMake := range podsToMake {
pod, podStat := podMaker(podToMake.name, podToMake.requests, podToMake.limits, podToMake.rootFsUsed, podToMake.logsFsUsed, podToMake.perLocalVolumeUsed)
pods = append(pods, pod)
podStats[pod] = podStat
}
podToEvict := pods[5]
activePodsFunc := func() []*v1.Pod {
return pods
}

fakeClock := clock.NewFakeClock(time.Now())
podKiller := &mockPodKiller{}
diskInfoProvider := &mockDiskInfoProvider{dedicatedImageFs: false}
capacityProvider := newMockCapacityProvider(newEphemeralStorageResourceList("6Gi", "1000m", "10Gi"), newEphemeralStorageResourceList("1Gi", "1000m", "10Gi"))
imageGcFree := resource.MustParse("800Mi")
diskGC := &mockDiskGC{imageBytesFreed: imageGcFree.Value(), err: nil}
nodeRef := &v1.ObjectReference{Kind: "Node", Name: "test", UID: types.UID("test"), Namespace: ""}

config := Config{
MaxPodGracePeriodSeconds: 5,
PressureTransitionPeriod: time.Minute * 5,
Thresholds: []evictionapi.Threshold{
{
Signal: evictionapi.SignalAllocatableNodeFsAvailable,
Operator: evictionapi.OpLessThan,
Value: evictionapi.ThresholdValue{
Quantity: quantityMustParse("10Mi"),
},
},
},
}
summaryProvider := &fakeSummaryProvider{result: summaryStatsMaker("6Gi", "6Gi", podStats)}
manager := &managerImpl{
clock: fakeClock,
killPodFunc: podKiller.killPodNow,
imageGC: diskGC,
containerGC: diskGC,
config: config,
recorder: &record.FakeRecorder{},
summaryProvider: summaryProvider,
nodeRef: nodeRef,
nodeConditionsLastObservedAt: nodeConditionsObservedAt{},
thresholdsFirstObservedAt: thresholdsObservedAt{},
}

// synchronize
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)

// we should not have disk pressure
if manager.IsUnderDiskPressure() {
t.Errorf("Manager should not report disk pressure")
}

// induce hard threshold
fakeClock.Step(1 * time.Minute)

pod, podStat := podMaker("guaranteed-high-2", newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), newEphemeralStorageResourceList("2000Mi", "100m", "1Gi"), "2000Mi", "", "")
podStats[pod] = podStat
pods = append(pods, pod)
summaryProvider.result = summaryStatsMaker("6Gi", "6Gi", podStats)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)

// we should have disk pressure
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure since soft threshold was met")
}

// verify image gc was invoked
if !diskGC.imageGCInvoked || !diskGC.containerGCInvoked {
t.Fatalf("Manager should have invoked image gc")
}

// verify no pod was killed because image gc was sufficient
if podKiller.pod == nil {
t.Fatalf("Manager should have killed a pod, but not killed")
}
// check the right pod was killed
if podKiller.pod != podToEvict {
t.Fatalf("Manager chose to kill pod: %v, but should have chosen %v", podKiller.pod, podToEvict.Name)
}
observedGracePeriod := *podKiller.gracePeriodOverride
if observedGracePeriod != int64(0) {
t.Fatalf("Manager chose to kill pod with incorrect grace period. Expected: %d, actual: %d", 0, observedGracePeriod)
}

// reset state
diskGC.imageGCInvoked = false
diskGC.containerGCInvoked = false
podKiller.pod = nil
podKiller.gracePeriodOverride = nil

// reduce disk pressure
fakeClock.Step(1 * time.Minute)
pods[5] = pods[len(pods)-1]
pods = pods[:len(pods)-1]

// we should have disk pressure (because transition period not yet met)
if !manager.IsUnderDiskPressure() {
t.Fatalf("Manager should report disk pressure")
}

// move the clock past transition period to ensure that we stop reporting pressure
fakeClock.Step(5 * time.Minute)
manager.synchronize(diskInfoProvider, activePodsFunc, capacityProvider)

// we should not have disk pressure (because transition period met)
if manager.IsUnderDiskPressure() {
t.Fatalf("Manager should not report disk pressure")
}

// no pod should have been killed
if podKiller.pod != nil {
t.Fatalf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}

// no image gc should have occurred
if diskGC.imageGCInvoked || diskGC.containerGCInvoked {
t.Errorf("Manager chose to perform image gc when it was not neeed")
}

// no pod should have been killed
if podKiller.pod != nil {
t.Errorf("Manager chose to kill pod: %v when no pod should have been killed", podKiller.pod.Name)
}
}
17 changes: 8 additions & 9 deletions pkg/kubelet/eviction/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ var (
signalToNodeCondition map[evictionapi.Signal]v1.NodeConditionType
// signalToResource maps a Signal to its associated Resource.
signalToResource map[evictionapi.Signal]v1.ResourceName
// resourceToSignal maps a Resource to its associated Signal
resourceToSignal map[v1.ResourceName]evictionapi.Signal
// resourceClaimToSignal maps a Resource that can be reclaimed to its associated Signal
resourceClaimToSignal map[v1.ResourceName][]evictionapi.Signal
)

func init() {
Expand All @@ -86,13 +86,12 @@ func init() {
signalToResource[evictionapi.SignalNodeFsAvailable] = resourceNodeFs
signalToResource[evictionapi.SignalNodeFsInodesFree] = resourceNodeFsInodes

resourceToSignal = map[v1.ResourceName]evictionapi.Signal{}
for key, value := range signalToResource {
resourceToSignal[value] = key
}
// Hard-code here to make sure resourceNodeFs maps to evictionapi.SignalNodeFsAvailable
// (TODO) resourceToSignal is a map from resource name to a list of signals
resourceToSignal[resourceNodeFs] = evictionapi.SignalNodeFsAvailable
// maps resource to signals (the following resource could be reclaimed)
resourceClaimToSignal = map[v1.ResourceName][]evictionapi.Signal{}
resourceClaimToSignal[resourceNodeFs] = []evictionapi.Signal{evictionapi.SignalNodeFsAvailable}
resourceClaimToSignal[resourceImageFs] = []evictionapi.Signal{evictionapi.SignalImageFsAvailable}
resourceClaimToSignal[resourceNodeFsInodes] = []evictionapi.Signal{evictionapi.SignalNodeFsInodesFree}
resourceClaimToSignal[resourceImageFsInodes] = []evictionapi.Signal{evictionapi.SignalImageFsInodesFree}
}

// validSignal returns true if the signal is supported.
Expand Down
14 changes: 14 additions & 0 deletions pkg/kubelet/eviction/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1604,6 +1604,20 @@ func newResourceList(cpu, memory string) v1.ResourceList {
return res
}

func newEphemeralStorageResourceList(ephemeral, cpu, memory string) v1.ResourceList {
res := v1.ResourceList{}
if ephemeral != "" {
res[v1.ResourceEphemeralStorage] = resource.MustParse(ephemeral)
}
if cpu != "" {
res[v1.ResourceCPU] = resource.MustParse(cpu)
}
if memory != "" {
res[v1.ResourceMemory] = resource.MustParse("1Mi")
}
return res
}

func newResourceRequirements(requests, limits v1.ResourceList) v1.ResourceRequirements {
res := v1.ResourceRequirements{}
res.Requests = requests
Expand Down