Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #66082: move feature gate checks inside IsCriticalPod #64954: Added unschedulable and network-unavailable toleration. #68494: Added default tolerations for new pods. #68601

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion pkg/controller/daemon/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ go_library(
importpath = "k8s.io/kubernetes/pkg/controller/daemon",
deps = [
"//pkg/api/v1/pod:go_default_library",
"//pkg/apis/core/v1/helper:go_default_library",
"//pkg/controller:go_default_library",
"//pkg/controller/daemon/util:go_default_library",
"//pkg/features:go_default_library",
Expand Down
56 changes: 6 additions & 50 deletions pkg/controller/daemon/daemon_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ import (
"k8s.io/client-go/util/integer"
"k8s.io/client-go/util/workqueue"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
"k8s.io/kubernetes/pkg/controller"
"k8s.io/kubernetes/pkg/controller/daemon/util"
"k8s.io/kubernetes/pkg/features"
Expand Down Expand Up @@ -970,7 +969,7 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod
if err != nil {
generation = nil
}
template := util.CreatePodTemplate(ds.Spec.Template, generation, hash)
template := util.CreatePodTemplate(ds.Namespace, ds.Spec.Template, generation, hash)
// Batch the pod creates. Batch sizes start at SlowStartInitialBatchSize
// and double with each successful iteration in a kind of "slow start".
// This handles attempts to start large numbers of pods that would
Expand All @@ -997,7 +996,6 @@ func (dsc *DaemonSetsController) syncNodes(ds *apps.DaemonSet, podsToDelete, nod
// should be no conflicting node affinity with the target node.
podTemplate.Spec.Affinity = util.ReplaceDaemonSetPodNodeNameNodeAffinity(
podTemplate.Spec.Affinity, nodesNeedingDaemonPods[ix])
podTemplate.Spec.Tolerations = util.AppendNoScheduleTolerationIfNotExist(podTemplate.Spec.Tolerations)

err = dsc.podControl.CreatePodsWithControllerRef(ds.Namespace, podTemplate,
ds, metav1.NewControllerRef(ds, controllerKind))
Expand Down Expand Up @@ -1247,51 +1245,6 @@ func (dsc *DaemonSetsController) syncDaemonSet(key string) error {
}

func (dsc *DaemonSetsController) simulate(newPod *v1.Pod, node *v1.Node, ds *apps.DaemonSet) ([]algorithm.PredicateFailureReason, *schedulercache.NodeInfo, error) {
// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
// Add infinite toleration for taint notReady:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns not ready.
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeNotReady,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})

// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
// Add infinite toleration for taint unreachable:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns unreachable.
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeUnreachable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})

// According to TaintNodesByCondition, all DaemonSet pods should tolerate
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
// OutOfDisk taint additional.
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeDiskPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

// TODO(#48843) OutOfDisk taints will be removed in 1.10
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCriticalPod(newPod) {
v1helper.AddOrUpdateTolerationInPod(newPod, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
}

objects, err := dsc.podNodeIndex.ByIndex("nodeName", node.Name)
if err != nil {
return nil, nil, err
Expand Down Expand Up @@ -1421,6 +1374,10 @@ func NewPod(ds *apps.DaemonSet, nodeName string) *v1.Pod {
newPod := &v1.Pod{Spec: ds.Spec.Template.Spec, ObjectMeta: ds.Spec.Template.ObjectMeta}
newPod.Namespace = ds.Namespace
newPod.Spec.NodeName = nodeName

// Added default tolerations for DaemonSet pods.
util.AddOrUpdateDaemonPodTolerations(&newPod.Spec, kubelettypes.IsCriticalPod(newPod))

return newPod
}

Expand Down Expand Up @@ -1466,8 +1423,7 @@ func Predicates(pod *v1.Pod, nodeInfo *schedulercache.NodeInfo) (bool, []algorit
return len(predicateFails) == 0, predicateFails, nil
}

critical := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCriticalPod(pod)
critical := kubelettypes.IsCriticalPod(pod)

fit, reasons, err := predicates.PodToleratesNodeTaints(pod, nil, nodeInfo)
if err != nil {
Expand Down
54 changes: 34 additions & 20 deletions pkg/controller/daemon/daemon_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,12 @@ func markPodReady(pod *v1.Pod) {
podutil.UpdatePodCondition(&pod.Status, &condition)
}

func setFeatureGate(t *testing.T, feature utilfeature.Feature, enabled bool) {
if err := utilfeature.DefaultFeatureGate.Set(fmt.Sprintf("%s=%t", feature, enabled)); err != nil {
t.Fatalf("Failed to set FeatureGate %v to %t: %v", feature, enabled, err)
}
}

// DaemonSets without node selectors should launch pods on every node.
func TestSimpleDaemonSetLaunchesPods(t *testing.T) {
for _, strategy := range updateStrategies() {
Expand All @@ -458,12 +464,9 @@ func TestSimpleDaemonSetScheduleDaemonSetPodsLaunchesPods(t *testing.T) {
enabled := utilfeature.DefaultFeatureGate.Enabled(features.ScheduleDaemonSetPods)
// Rollback feature gate.
defer func() {
if !enabled {
utilfeature.DefaultFeatureGate.Set("ScheduleDaemonSetPods=false")
}
setFeatureGate(t, features.ScheduleDaemonSetPods, enabled)
}()

utilfeature.DefaultFeatureGate.Set("ScheduleDaemonSetPods=true")
setFeatureGate(t, features.ScheduleDaemonSetPods, true)

nodeNum := 5

Expand Down Expand Up @@ -1508,6 +1511,11 @@ func setDaemonSetToleration(ds *apps.DaemonSet, tolerations []v1.Toleration) {
// DaemonSet should launch a critical pod even when the node with OutOfDisk taints.
// TODO(#48843) OutOfDisk taints will be removed in 1.10
func TestTaintOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
enabled := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation)
defer func() {
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, enabled)
}()

for _, strategy := range updateStrategies() {
ds := newDaemonSet("critical")
ds.Spec.UpdateStrategy = *strategy
Expand All @@ -1525,25 +1533,24 @@ func TestTaintOutOfDiskNodeDaemonLaunchesCriticalPod(t *testing.T) {
// NOTE: Whether or not TaintNodesByCondition is enabled, it'll add toleration to DaemonSet pods.

// Without enabling critical pod annotation feature gate, we shouldn't create critical pod
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, false)
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 0, 0, 0)

// With enabling critical pod annotation feature gate, we will create critical pod
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, true)
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)

// Rollback feature gate to false.
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
}
}

// DaemonSet should launch a pod even when the node with MemoryPressure/DiskPressure taints.
func TestTaintPressureNodeDaemonLaunchesPod(t *testing.T) {
enabled := utilfeature.DefaultFeatureGate.Enabled(features.TaintNodesByCondition)
defer func() {
setFeatureGate(t, features.TaintNodesByCondition, enabled)
}()

for _, strategy := range updateStrategies() {
ds := newDaemonSet("critical")
ds.Spec.UpdateStrategy = *strategy
Expand All @@ -1565,17 +1572,19 @@ func TestTaintPressureNodeDaemonLaunchesPod(t *testing.T) {
manager.nodeStore.Add(node)

// Enabling critical pod and taint nodes by condition feature gate should create critical pod
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=True")
setFeatureGate(t, features.TaintNodesByCondition, true)
manager.dsStore.Add(ds)
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 0)

// Rollback feature gate to false.
utilfeature.DefaultFeatureGate.Set("TaintNodesByCondition=False")
}
}

// DaemonSet should launch a critical pod even when the node has insufficient free resource.
func TestInsufficientCapacityNodeDaemonLaunchesCriticalPod(t *testing.T) {
enabled := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation)
defer func() {
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, enabled)
}()

for _, strategy := range updateStrategies() {
podSpec := resourcePodSpec("too-much-mem", "75M", "75m")
ds := newDaemonSet("critical")
Expand All @@ -1595,7 +1604,7 @@ func TestInsufficientCapacityNodeDaemonLaunchesCriticalPod(t *testing.T) {
})

// Without enabling critical pod annotation feature gate, we shouldn't create critical pod
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=False")
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, false)
manager.dsStore.Add(ds)
switch strategy.Type {
case apps.OnDeleteDaemonSetStrategyType:
Expand All @@ -1607,7 +1616,7 @@ func TestInsufficientCapacityNodeDaemonLaunchesCriticalPod(t *testing.T) {
}

// Enabling critical pod annotation feature gate should create critical pod
utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, true)
switch strategy.Type {
case apps.OnDeleteDaemonSetStrategyType:
syncAndValidateDaemonSets(t, manager, ds, podControl, 1, 0, 2)
Expand All @@ -1621,6 +1630,11 @@ func TestInsufficientCapacityNodeDaemonLaunchesCriticalPod(t *testing.T) {

// DaemonSets should NOT launch a critical pod when there are port conflicts.
func TestPortConflictNodeDaemonDoesNotLaunchCriticalPod(t *testing.T) {
enabled := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation)
defer func() {
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, enabled)
}()

for _, strategy := range updateStrategies() {
podSpec := v1.PodSpec{
NodeName: "port-conflict",
Expand All @@ -1640,7 +1654,7 @@ func TestPortConflictNodeDaemonDoesNotLaunchCriticalPod(t *testing.T) {
Spec: podSpec,
})

utilfeature.DefaultFeatureGate.Set("ExperimentalCriticalPodAnnotation=True")
setFeatureGate(t, features.ExperimentalCriticalPodAnnotation, true)
ds := newDaemonSet("critical")
ds.Spec.UpdateStrategy = *strategy
ds.Spec.Template.Spec = podSpec
Expand Down
1 change: 0 additions & 1 deletion pkg/controller/daemon/util/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ go_library(
"//vendor/k8s.io/api/apps/v1:go_default_library",
"//vendor/k8s.io/api/core/v1:go_default_library",
"//vendor/k8s.io/api/extensions/v1beta1:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/api/equality:go_default_library",
"//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//vendor/k8s.io/apiserver/pkg/util/feature:go_default_library",
],
Expand Down
82 changes: 42 additions & 40 deletions pkg/controller/daemon/util/daemonset_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
apps "k8s.io/api/apps/v1"
"k8s.io/api/core/v1"
extensions "k8s.io/api/extensions/v1beta1"
apiequality "k8s.io/apimachinery/pkg/api/equality"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilfeature "k8s.io/apiserver/pkg/util/feature"
podutil "k8s.io/kubernetes/pkg/api/v1/pod"
Expand All @@ -49,16 +48,13 @@ func GetTemplateGeneration(ds *apps.DaemonSet) (*int64, error) {
return &generation, nil
}

// CreatePodTemplate returns copy of provided template with additional
// label which contains templateGeneration (for backward compatibility),
// hash of provided template and sets default daemon tolerations.
func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
newTemplate := *template.DeepCopy()
// AddOrUpdateDaemonPodTolerations apply necessary tolerations to DeamonSet Pods, e.g. node.kubernetes.io/not-ready:NoExecute.
func AddOrUpdateDaemonPodTolerations(spec *v1.PodSpec, isCritical bool) {
// DaemonSet pods shouldn't be deleted by NodeController in case of node problems.
// Add infinite toleration for taint notReady:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns not ready.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeNotReady,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
Expand All @@ -68,36 +64,67 @@ func CreatePodTemplate(template v1.PodTemplateSpec, generation *int64, hash stri
// Add infinite toleration for taint unreachable:NoExecute here
// to survive taint-based eviction enforced by NodeController
// when node turns unreachable.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeUnreachable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})

// According to TaintNodesByCondition feature, all DaemonSet pods should tolerate
// MemoryPressure and DisPressure taints, and the critical pods should tolerate
// OutOfDisk taint.
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
// MemoryPressure, DisPressure, Unschedulable and NetworkUnavailable taints,
// and the critical pods should tolerate OutOfDisk taint.
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeDiskPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeMemoryPressure,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeUnschedulable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})

if spec.HostNetwork {
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeNetworkUnavailable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
}

// TODO(#48843) OutOfDisk taints will be removed in 1.10
if utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCritical(newTemplate.Namespace, newTemplate.Annotations) {
v1helper.AddOrUpdateTolerationInPodSpec(&newTemplate.Spec, &v1.Toleration{
if isCritical {
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoExecute,
})
v1helper.AddOrUpdateTolerationInPodSpec(spec, &v1.Toleration{
Key: algorithm.TaintNodeOutOfDisk,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
})
}
}

// CreatePodTemplate returns copy of provided template with additional
// label which contains templateGeneration (for backward compatibility),
// hash of provided template and sets default daemon tolerations.
func CreatePodTemplate(ns string, template v1.PodTemplateSpec, generation *int64, hash string) v1.PodTemplateSpec {
newTemplate := *template.DeepCopy()

// TODO(k82cn): when removing CritialPod feature, also remove 'ns' parameter.
isCritical := utilfeature.DefaultFeatureGate.Enabled(features.ExperimentalCriticalPodAnnotation) &&
kubelettypes.IsCritical(ns, newTemplate.Annotations)

AddOrUpdateDaemonPodTolerations(&newTemplate.Spec, isCritical)

if newTemplate.ObjectMeta.Labels == nil {
newTemplate.ObjectMeta.Labels = make(map[string]string)
Expand Down Expand Up @@ -185,31 +212,6 @@ func ReplaceDaemonSetPodNodeNameNodeAffinity(affinity *v1.Affinity, nodename str
return affinity
}

// AppendNoScheduleTolerationIfNotExist appends unschedulable toleration to `.spec` if not exist; otherwise,
// no changes to `.spec.tolerations`.
func AppendNoScheduleTolerationIfNotExist(tolerations []v1.Toleration) []v1.Toleration {
unschedulableToleration := v1.Toleration{
Key: algorithm.TaintNodeUnschedulable,
Operator: v1.TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
}

unschedulableTaintExist := false

for _, t := range tolerations {
if apiequality.Semantic.DeepEqual(t, unschedulableToleration) {
unschedulableTaintExist = true
break
}
}

if !unschedulableTaintExist {
tolerations = append(tolerations, unschedulableToleration)
}

return tolerations
}

// GetTargetNodeName get the target node name of DaemonSet pods. If `.spec.NodeName` is not empty (nil),
// return `.spec.NodeName`; otherwise, retrieve node name of pending pods from NodeAffinity. Return error
// if failed to retrieve node name from `.spec.NodeName` and NodeAffinity.
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/daemon/util/daemonset_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ func TestCreatePodTemplate(t *testing.T) {
}
for _, test := range tests {
podTemplateSpec := v1.PodTemplateSpec{}
newPodTemplate := CreatePodTemplate(podTemplateSpec, test.templateGeneration, test.hash)
newPodTemplate := CreatePodTemplate("", podTemplateSpec, test.templateGeneration, test.hash)
val, exists := newPodTemplate.ObjectMeta.Labels[extensions.DaemonSetTemplateGenerationKey]
if !exists || val != fmt.Sprint(*test.templateGeneration) {
t.Errorf("Expected podTemplateSpec to have generation label value: %d, got: %s", *test.templateGeneration, val)
Expand Down