Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move TaintBasedEvictions feature gates to GA #87487

Merged
merged 1 commit into from Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 0 additions & 1 deletion cmd/kube-controller-manager/app/core.go
Expand Up @@ -207,7 +207,6 @@ func startNodeLifecycleController(ctx ControllerContext) (http.Handler, bool, er
ctx.ComponentConfig.NodeLifecycleController.LargeClusterSizeThreshold,
ctx.ComponentConfig.NodeLifecycleController.UnhealthyZoneThreshold,
ctx.ComponentConfig.NodeLifecycleController.EnableTaintManager,
utilfeature.DefaultFeatureGate.Enabled(features.TaintBasedEvictions),
)
if err != nil {
return nil, true, err
Expand Down
33 changes: 11 additions & 22 deletions pkg/controller/nodelifecycle/node_lifecycle_controller.go
Expand Up @@ -351,10 +351,6 @@ type Controller struct {
// tainted nodes, if they're not tolerated.
runTaintManager bool

// if set to true Controller will taint Nodes with 'TaintNodeNotReady' and 'TaintNodeUnreachable'
// taints instead of evicting Pods itself.
useTaintBasedEvictions bool

nodeUpdateQueue workqueue.Interface
podUpdateQueue workqueue.RateLimitingInterface
}
Expand All @@ -375,7 +371,6 @@ func NewNodeLifecycleController(
largeClusterThreshold int32,
unhealthyZoneThreshold float32,
runTaintManager bool,
useTaintBasedEvictions bool,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also remove its definition in type Controller?

) (*Controller, error) {

if kubeClient == nil {
Expand Down Expand Up @@ -416,13 +411,9 @@ func NewNodeLifecycleController(
largeClusterThreshold: largeClusterThreshold,
unhealthyZoneThreshold: unhealthyZoneThreshold,
runTaintManager: runTaintManager,
useTaintBasedEvictions: useTaintBasedEvictions && runTaintManager,
nodeUpdateQueue: workqueue.NewNamed("node_lifecycle_controller"),
podUpdateQueue: workqueue.NewNamedRateLimitingQueue(workqueue.DefaultControllerRateLimiter(), "node_lifecycle_controller_pods"),
}
if useTaintBasedEvictions {
klog.Infof("Controller is using taint based evictions.")
}

nc.enterPartialDisruptionFunc = nc.ReducedQPSFunc
nc.enterFullDisruptionFunc = nc.HealthyQPSFunc
Expand Down Expand Up @@ -580,7 +571,7 @@ func (nc *Controller) Run(stopCh <-chan struct{}) {
go wait.Until(nc.doPodProcessingWorker, time.Second, stopCh)
}

if nc.useTaintBasedEvictions {
if nc.runTaintManager {
// Handling taint based evictions. Because we don't want a dedicated logic in TaintManager for NC-originated
// taints and we normally don't rate limit evictions caused by taints, we need to rate limit adding taints.
go wait.Until(nc.doNoExecuteTaintingPass, scheduler.NodeEvictionPeriod, stopCh)
Expand Down Expand Up @@ -768,9 +759,7 @@ func (nc *Controller) doEvictionPass() {

// monitorNodeHealth verifies node health are constantly updated by kubelet, and
// if not, post "NodeReady==ConditionUnknown".
// For nodes who are not ready or not reachable for a long period of time.
// This function will taint them if TaintBasedEvictions feature was enabled.
// Otherwise, it would evict it directly.
// This function will taint nodes who are not ready or not reachable for a long period of time.
func (nc *Controller) monitorNodeHealth() error {
// We are listing nodes from local cache as we can tolerate some small delays
// comparing to state from etcd and there is eventual consistency anyway.
Expand All @@ -789,7 +778,7 @@ func (nc *Controller) monitorNodeHealth() error {
nodeutil.RecordNodeEvent(nc.recorder, added[i].Name, string(added[i].UID), v1.EventTypeNormal, "RegisteredNode", fmt.Sprintf("Registered Node %v in Controller", added[i].Name))
nc.knownNodeSet[added[i].Name] = added[i]
nc.addPodEvictorForNewZone(added[i])
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.markNodeAsReachable(added[i])
} else {
nc.cancelPodEviction(added[i])
Expand Down Expand Up @@ -843,7 +832,7 @@ func (nc *Controller) monitorNodeHealth() error {
}
continue
}
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.processTaintBaseEviction(node, &observedReadyCondition)
} else {
if err := nc.processNoTaintBaseEviction(node, &observedReadyCondition, gracePeriod, pods); err != nil {
Expand Down Expand Up @@ -1209,7 +1198,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
if allAreFullyDisrupted {
klog.V(0).Info("Controller detected that all Nodes are not-Ready. Entering master disruption mode.")
for i := range nodes {
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
_, err := nc.markNodeAsReachable(nodes[i])
if err != nil {
klog.Errorf("Failed to remove taints from Node %v", nodes[i].Name)
Expand All @@ -1220,7 +1209,7 @@ func (nc *Controller) handleDisruption(zoneToNodeConditions map[string][]*v1.Nod
}
// We stop all evictions.
for k := range nc.zoneStates {
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[k].SwapLimiter(0)
} else {
nc.zonePodEvictor[k].SwapLimiter(0)
Expand Down Expand Up @@ -1332,7 +1321,7 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
pods := []*v1.Pod{pod}
// In taint-based eviction mode, only node updates are processed by NodeLifecycleController.
// Pods are processed by TaintManager.
if !nc.useTaintBasedEvictions {
if !nc.runTaintManager {
if err := nc.processNoTaintBaseEviction(node, currentReadyCondition, nc.nodeMonitorGracePeriod, pods); err != nil {
klog.Warningf("Unable to process pod %+v eviction from node %v: %v.", podItem, nodeName, err)
nc.podUpdateQueue.AddRateLimited(podItem)
Expand All @@ -1351,21 +1340,21 @@ func (nc *Controller) processPod(podItem podUpdateItem) {
func (nc *Controller) setLimiterInZone(zone string, zoneSize int, state ZoneState) {
switch state {
case stateNormal:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(nc.evictionLimiterQPS)
} else {
nc.zonePodEvictor[zone].SwapLimiter(nc.evictionLimiterQPS)
}
case statePartialDisruption:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
} else {
nc.zonePodEvictor[zone].SwapLimiter(
nc.enterPartialDisruptionFunc(zoneSize))
}
case stateFullDisruption:
if nc.useTaintBasedEvictions {
if nc.runTaintManager {
nc.zoneNoExecuteTainter[zone].SwapLimiter(
nc.enterFullDisruptionFunc(zoneSize))
} else {
Expand Down Expand Up @@ -1431,7 +1420,7 @@ func (nc *Controller) addPodEvictorForNewZone(node *v1.Node) {
zone := utilnode.GetZoneKey(node)
if _, found := nc.zoneStates[zone]; !found {
nc.zoneStates[zone] = stateInitial
if !nc.useTaintBasedEvictions {
if !nc.runTaintManager {
nc.zonePodEvictor[zone] =
scheduler.NewRateLimitedTimedQueue(
flowcontrol.NewTokenBucketRateLimiter(nc.evictionLimiterQPS, scheduler.EvictionRateLimiterBurst))
Expand Down
Expand Up @@ -180,7 +180,6 @@ func newNodeLifecycleControllerFromClient(
largeClusterThreshold,
unhealthyZoneThreshold,
useTaints,
useTaints,
)
if err != nil {
return nil, err
Expand Down
3 changes: 2 additions & 1 deletion pkg/features/kube_features.go
Expand Up @@ -61,6 +61,7 @@ const (

// owner: @Huang-Wei
// beta: v1.13
// ga: v1.18
//
// Changes the logic behind evicting Pods from not ready Nodes
// to take advantage of NoExecute Taints and Tolerations.
Expand Down Expand Up @@ -592,7 +593,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
DynamicKubeletConfig: {Default: true, PreRelease: featuregate.Beta},
ExperimentalHostUserNamespaceDefaultingGate: {Default: false, PreRelease: featuregate.Beta},
DevicePlugins: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.Beta},
TaintBasedEvictions: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.19
RotateKubeletServerCertificate: {Default: true, PreRelease: featuregate.Beta},
RotateKubeletClientCertificate: {Default: true, PreRelease: featuregate.Beta},
LocalStorageCapacityIsolation: {Default: true, PreRelease: featuregate.Beta},
Expand Down
7 changes: 0 additions & 7 deletions staging/src/k8s.io/api/core/v1/well_known_taints.go
Expand Up @@ -18,38 +18,31 @@ package v1

const (
// TaintNodeNotReady will be added when node is not ready
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes ready.
TaintNodeNotReady = "node.kubernetes.io/not-ready"

// TaintNodeUnreachable will be added when node becomes unreachable
// (corresponding to NodeReady status ConditionUnknown)
// and feature-gate for TaintBasedEvictions flag is enabled,
// and removed when node becomes reachable (NodeReady status ConditionTrue).
TaintNodeUnreachable = "node.kubernetes.io/unreachable"

// TaintNodeUnschedulable will be added when node becomes unschedulable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node becomes scheduable.
TaintNodeUnschedulable = "node.kubernetes.io/unschedulable"

// TaintNodeMemoryPressure will be added when node has memory pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough memory.
TaintNodeMemoryPressure = "node.kubernetes.io/memory-pressure"

// TaintNodeDiskPressure will be added when node has disk pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodeDiskPressure = "node.kubernetes.io/disk-pressure"

// TaintNodeNetworkUnavailable will be added when node's network is unavailable
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when network becomes ready.
TaintNodeNetworkUnavailable = "node.kubernetes.io/network-unavailable"

// TaintNodePIDPressure will be added when node has pid pressure
// and feature-gate for TaintNodesByCondition flag is enabled,
// and removed when node has enough disk.
TaintNodePIDPressure = "node.kubernetes.io/pid-pressure"
)
3 changes: 0 additions & 3 deletions test/integration/node/BUILD
Expand Up @@ -15,7 +15,6 @@ go_test(
tags = ["integration"],
deps = [
"//pkg/controller/nodelifecycle:go_default_library",
"//pkg/features:go_default_library",
"//plugin/pkg/admission/defaulttolerationseconds:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction:go_default_library",
"//plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction:go_default_library",
Expand All @@ -25,11 +24,9 @@ go_test(
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/runtime/schema:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/admission:go_default_library",
"//staging/src/k8s.io/apiserver/pkg/util/feature:go_default_library",
"//staging/src/k8s.io/client-go/informers:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/client-go/rest:go_default_library",
"//staging/src/k8s.io/component-base/featuregate/testing:go_default_library",
"//test/e2e/framework/pod:go_default_library",
"//test/integration/framework:go_default_library",
"//test/integration/util:go_default_library",
Expand Down
7 changes: 0 additions & 7 deletions test/integration/node/lifecycle_test.go
Expand Up @@ -28,13 +28,10 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apiserver/pkg/admission"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
restclient "k8s.io/client-go/rest"
featuregatetesting "k8s.io/component-base/featuregate/testing"
"k8s.io/kubernetes/pkg/controller/nodelifecycle"
"k8s.io/kubernetes/pkg/features"
"k8s.io/kubernetes/plugin/pkg/admission/defaulttolerationseconds"
"k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction"
pluginapi "k8s.io/kubernetes/plugin/pkg/admission/podtolerationrestriction/apis/podtolerationrestriction"
Expand Down Expand Up @@ -109,9 +106,6 @@ func TestTaintBasedEvictions(t *testing.T) {
},
}

// Enable TaintBasedEvictions
defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features.TaintBasedEvictions, true)()

// Build admission chain handler.
podTolerations := podtolerationrestriction.NewPodTolerationsPlugin(&pluginapi.Configuration{})
admission := admission.NewChainHandler(
Expand Down Expand Up @@ -156,7 +150,6 @@ func TestTaintBasedEvictions(t *testing.T) {
50, // Large cluster threshold
0.55, // Unhealthy zone threshold
true, // Run taint manager
true, // Use taint based evictions
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
Expand Down
1 change: 0 additions & 1 deletion test/integration/scheduler/taint_test.go
Expand Up @@ -98,7 +98,6 @@ func TestTaintNodeByCondition(t *testing.T) {
100, // Large cluster threshold
100, // Unhealthy zone threshold
true, // Run taint manager
true, // Use taint based evictions
)
if err != nil {
t.Errorf("Failed to create node controller: %v", err)
Expand Down