Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #93722: Do not evict pods which tolerate all NoExecute taints #93814

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/controller/nodelifecycle/scheduler/BUILD
Expand Up @@ -42,6 +42,7 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/types:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes/fake:go_default_library",
"//staging/src/k8s.io/client-go/testing:go_default_library",
Expand Down
3 changes: 2 additions & 1 deletion pkg/controller/nodelifecycle/scheduler/taint_manager.go
Expand Up @@ -358,7 +358,8 @@ func (tc *NoExecuteTaintManager) processPodOnNode(
minTolerationTime := getMinTolerationTime(usedTolerations)
// getMinTolerationTime returns negative value to denote infinite toleration.
if minTolerationTime < 0 {
klog.V(4).Infof("New tolerations for %v tolerate forever. Scheduled deletion won't be cancelled if already scheduled.", podNamespacedName.String())
klog.V(4).Infof("Current tolerations for %v tolerate forever, cancelling any scheduled deletion.", podNamespacedName.String())
tc.cancelWorkWithEvent(podNamespacedName)
return
}

Expand Down
87 changes: 83 additions & 4 deletions pkg/controller/nodelifecycle/scheduler/taint_manager_test.go
Expand Up @@ -27,6 +27,7 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes/fake"
"k8s.io/kubernetes/pkg/controller/testutil"

Expand Down Expand Up @@ -83,10 +84,20 @@ func (p *podHolder) setPod(pod *v1.Pod) {
}

type nodeHolder struct {
lock sync.Mutex

node *v1.Node
}

func (n *nodeHolder) setNode(node *v1.Node) {
n.lock.Lock()
defer n.lock.Unlock()
n.node = node
}

func (n *nodeHolder) getNode(name string) (*v1.Node, error) {
n.lock.Lock()
defer n.lock.Unlock()
return n.node, nil
}

Expand Down Expand Up @@ -362,7 +373,7 @@ func TestCreateNode(t *testing.T) {
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{item.node}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.node}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.NodeUpdated(nil, item.node)
Expand Down Expand Up @@ -483,7 +494,7 @@ func TestUpdateNode(t *testing.T) {
for _, item := range testCases {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.NodeUpdated(item.oldNode, item.newNode)
Expand All @@ -506,6 +517,74 @@ func TestUpdateNode(t *testing.T) {
}
}

func TestUpdateNodeWithMultipleTaints(t *testing.T) {
taint1 := createNoExecuteTaint(1)
taint2 := createNoExecuteTaint(2)

minute := int64(60)
pod := testutil.NewPod("pod1", "node1")
pod.Spec.Tolerations = []v1.Toleration{
{Key: taint1.Key, Operator: v1.TolerationOpExists, Effect: v1.TaintEffectNoExecute},
{Key: taint2.Key, Operator: v1.TolerationOpExists, Effect: v1.TaintEffectNoExecute, TolerationSeconds: &minute},
}
podNamespacedName := types.NamespacedName{Namespace: pod.Namespace, Name: pod.Name}

untaintedNode := testutil.NewNode("node1")

doubleTaintedNode := testutil.NewNode("node1")
doubleTaintedNode.Spec.Taints = []v1.Taint{taint1, taint2}

singleTaintedNode := testutil.NewNode("node1")
singleTaintedNode.Spec.Taints = []v1.Taint{taint1}

stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(pod)
holder := &nodeHolder{node: untaintedNode}
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (holder).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)

// no taint
holder.setNode(untaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
t.Fatalf("pod queued for deletion with no taints")
}

// no taint -> infinitely tolerated taint
holder.setNode(singleTaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
t.Fatalf("pod queued for deletion with permanently tolerated taint")
}

// infinitely tolerated taint -> temporarily tolerated taint
holder.setNode(doubleTaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
// verify pod is queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) == nil {
t.Fatalf("pod not queued for deletion after addition of temporarily tolerated taint")
}

// temporarily tolerated taint -> infinitely tolerated taint
holder.setNode(singleTaintedNode)
controller.handleNodeUpdate(nodeUpdateItem{"node1"})
// verify pod is not queued for deletion
if controller.taintEvictionQueue.GetWorkerUnsafe(podNamespacedName.String()) != nil {
t.Fatalf("pod queued for deletion after removal of temporarily tolerated taint")
}

// verify pod is not deleted
for _, action := range fakeClientset.Actions() {
if action.GetVerb() == "delete" && action.GetResource().Resource == "pods" {
t.Error("Unexpected deletion")
}
}
close(stopCh)
}

func TestUpdateNodeWithMultiplePods(t *testing.T) {
testCases := []struct {
description string
Expand Down Expand Up @@ -549,7 +628,7 @@ func TestUpdateNodeWithMultiplePods(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
sort.Sort(item.expectedDeleteTimes)
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(fakeClientset, getPodFromClientset(fakeClientset), (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)
controller.NodeUpdated(item.oldNode, item.newNode)
Expand Down Expand Up @@ -749,7 +828,7 @@ func TestEventualConsistency(t *testing.T) {
stopCh := make(chan struct{})
fakeClientset := fake.NewSimpleClientset(&v1.PodList{Items: item.pods})
holder := &podHolder{}
controller := NewNoExecuteTaintManager(fakeClientset, holder.getPod, (&nodeHolder{item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller := NewNoExecuteTaintManager(fakeClientset, holder.getPod, (&nodeHolder{node: item.newNode}).getNode, getPodsAssignedToNode(fakeClientset))
controller.recorder = testutil.NewFakeRecorder()
go controller.Run(stopCh)

Expand Down