Skip to content

Commit

Permalink
Merge pull request #50186 from k82cn/k8s_42001-4
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue

Task 2: Added toleration to DaemonSet pods for node condition taints

**What this PR does / why we need it**:
If TaintByCondition was enabled, added toleration to DaemonSet pods for node condition taints.
**Which issue this PR fixes** *(optional, in `fixes #<issue number>(, fixes #<issue_number>, ...)` format, will close that issue when PR gets merged)*: part of #42001 

**Release note**:
```release-note
None
```
  • Loading branch information
Kubernetes Submit Queue committed Aug 11, 2017
2 parents 68c4fd3 + 2ebd743 commit 6cbfac2
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 2 deletions.
29 changes: 29 additions & 0 deletions pkg/controller/daemon/daemon_controller.go
Expand Up @@ -1053,6 +1053,30 @@ func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *ext
Effect: v1.TaintEffectNoExecute,
})

// According to TaintNodesByCondition, all DaemonSet pods should tolerate
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
// OutOfDisk taint additional.
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeDiskPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCriticalPod(newPod) {
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
}

pods := []*v1.Pod{}

podList, err := dsc.podLister.List(labels.Everything())
Expand Down Expand Up @@ -1225,6 +1249,11 @@ func Predicates(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo) (bool, []algorit
func NodeConditionPredicates(nodeInfo *schedulercache.NodeInfo) (bool, []algorithm.PredicateFailureReason) {
reasons := []algorithm.PredicateFailureReason{}

// If TaintNodesByCondition feature was enabled, account PodToleratesNodeTaints predicates.
if utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition) {
return true, nil
}

for _, c := range nodeInfo.Node().Status.Conditions {
// TODO: There are other node status that the DaemonSet should ideally respect too,
// e.g. MemoryPressure, and DiskPressure
Expand Down
62 changes: 62 additions & 0 deletions pkg/controller/daemon/daemon_controller_test.go
Expand Up @@ -1251,6 +1251,68 @@ func TestOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
}
}

// DaemonSet should launch a critical pod even when the node with OutOfDisk taints.
func TestTaintOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
for _, strategy := range updateStrategies() {
ds := newDaemonSet("critical")
ds.Spec.UpdateStrategy = *strategy
setDaemonSetCritical(ds)
manager, podControl, _ := newTestController(ds)

node := newNode("not-enough-disk", nil)
node.Status.Conditions = []v1.NodeCondition{{Type: v1.NodeOutOfDisk, Status: v1.ConditionTrue}}
node.Spec.Taints = []v1.Taint{{Key: algorithm.TaintNodeOutOfDisk, Effect: v1.TaintEffectNoSchedule}}
manager.nodeStore.Add(node)

// NOTE: Whether or not TaintNodesByCondition is enabled, it'll add toleration to DaemonSet pods.

// Without enabling critical pod annotation feature gate, we shouldn't create critical pod
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0, 0)

// With enabling critical pod annotation feature gate, we will create critical pod
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)

// Rollback feature gate to false.
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
}
}

// DaemonSet should launch a pod even when the node with MemoryPressure/DiskPressure taints.
func TestTaintPressureNodeDaemonLaunchesPod(t *testing.T) {
for _, strategy := range updateStrategies() {
ds := newDaemonSet("critical")
ds.Spec.UpdateStrategy = *strategy
setDaemonSetCritical(ds)
manager, podControl, _ := newTestController(ds)

node := newNode("resources-pressure", nil)
node.Status.Conditions = []v1.NodeCondition{
{Type: v1.NodeDiskPressure, Status: v1.ConditionTrue},
{Type: v1.NodeMemoryPressure, Status: v1.ConditionTrue},
}
node.Spec.Taints = []v1.Taint{
{Key: algorithm.TaintNodeDiskPressure, Effect: v1.TaintEffectNoSchedule},
{Key: algorithm.TaintNodeMemoryPressure, Effect: v1.TaintEffectNoSchedule},
}
manager.nodeStore.Add(node)

// Enabling critical pod and taint nodes by condition feature gate should create critical pod
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)

// Rollback feature gate to false.
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
}
}

// DaemonSet should launch a critical pod even when the node has insufficient free resource.
func TestInsufficientCapacityNodeDaemonLaunchesCriticalPod(t *testing.T) {
for _, strategy := range updateStrategies() {
Expand Down
3 changes: 3 additions & 0 deletions pkg/controller/daemon/util/BUILD
Expand Up @@ -15,11 +15,14 @@ go_library(
deps = [
"//pkg/api/v1/helper:go_default_library",
"//pkg/api/v1/pod:go_default_library",
"//pkg/features:go_default_library",
"//pkg/kubelet/types:go_default_library",
"//pkg/util/labels:go_default_library",
"//plugin/pkg/scheduler/algorithm:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//vendor/k8s.io/client-go/kubernetes/scheme:go_default_library",
],
)
Expand Down
27 changes: 27 additions & 0 deletions pkg/controller/daemon/util/daemonset_util.go
Expand Up @@ -22,9 +22,12 @@ import (
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/kubernetes/scheme"
v1helper "k8s.io/kubernetes/pkg/api/v1/helper"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
"k8s.io/kubernetes/pkg/features"
kubelettypes "k8s.io/kubernetes/pkg/kubelet/types"
labelsutil "k8s.io/kubernetes/pkg/util/labels"
"k8s.io/kubernetes/plugin/pkg/scheduler/algorithm"
)
Expand Down Expand Up @@ -55,6 +58,30 @@ func CreatePodTemplate(template v1.PodTemplateSpec, generation int64, hash strin
Effect: v1.TaintEffectNoExecute,
})

// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
// OutOfDisk taint.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
Key: algorithm.TaintNodeDiskPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCritical(newTemplate.Namespace, newTemplate.Annotations) {
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})
}

templateGenerationStr := fmt.Sprint(generation)
newTemplate.ObjectMeta.Labels = labelsutil.CloneAndAddLabel(
template.ObjectMeta.Labels,
Expand Down
10 changes: 8 additions & 2 deletions pkg/kubelet/types/pod_update.go
Expand Up @@ -141,11 +141,17 @@ func (sp SyncPodType) String() string {
// key. Both the rescheduler and the kubelet use this key to make admission
// and scheduling decisions.
func IsCriticalPod(pod *v1.Pod) bool {
return IsCritical(pod.Namespace, pod.Annotations)
}

// IsCritical returns true if parameters bear the critical pod annotation
// key. The DaemonSetController use this key directly to make scheduling decisions.
func IsCritical(ns string, annotations map[string]string) bool {
// Critical pods are restricted to "kube-system" namespace as of now.
if pod.Namespace != kubeapi.NamespaceSystem {
if ns != kubeapi.NamespaceSystem {
return false
}
val, ok := pod.Annotations[CriticalPodAnnotationKey]
val, ok := annotations[CriticalPodAnnotationKey]
if ok && val == "" {
return true
}
Expand Down

0 comments on commit 6cbfac2

Please sign in to comment.