Skip to content

Commit

Permalink
Merge pull request #4521 from jwtty/cp_nodeshutdown_1.26
Browse files Browse the repository at this point in the history
[release-1.26] add node out-of-service taint when node is shutdown
  • Loading branch information
k8s-ci-robot committed Aug 29, 2023
2 parents 62e2578 + 0cabd5b commit 7fe6b82
Show file tree
Hide file tree
Showing 2 changed files with 237 additions and 0 deletions.
53 changes: 53 additions & 0 deletions pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ import (
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
cloudnodeutil "k8s.io/cloud-provider/node/helpers"
nodeutil "k8s.io/component-helpers/node/util"
"k8s.io/klog/v2"

azclients "sigs.k8s.io/cloud-provider-azure/pkg/azureclients"
Expand Down Expand Up @@ -76,6 +79,7 @@ import (
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
"sigs.k8s.io/cloud-provider-azure/pkg/util/taints"

"sigs.k8s.io/yaml"
)
Expand All @@ -87,6 +91,14 @@ var (
defaultDisableOutboundSNAT = false
// RouteUpdateWaitingInSeconds is 30 seconds by default.
defaultRouteUpdateWaitingInSeconds = 30
nodeOutOfServiceTaint = &v1.Taint{
Key: v1.TaintNodeOutOfService,
Effect: v1.TaintEffectNoExecute,
}
nodeShutdownTaint = &v1.Taint{
Key: cloudproviderapi.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
}
)

// Config holds the configuration parsed from the --cloud-config flag
Expand Down Expand Up @@ -1039,11 +1051,13 @@ func (az *Cloud) SetInformers(informerFactory informers.SharedInformerFactory) {
AddFunc: func(obj interface{}) {
node := obj.(*v1.Node)
az.updateNodeCaches(nil, node)
az.updateNodeTaint(node)
},
UpdateFunc: func(prev, obj interface{}) {
prevNode := prev.(*v1.Node)
newNode := obj.(*v1.Node)
az.updateNodeCaches(prevNode, newNode)
az.updateNodeTaint(newNode)
},
DeleteFunc: func(obj interface{}) {
node, isNode := obj.(*v1.Node)
Expand Down Expand Up @@ -1175,6 +1189,35 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
}
}

// updateNodeTaint updates node out-of-service taint
func (az *Cloud) updateNodeTaint(node *v1.Node) {
if node == nil {
klog.Warningf("node is nil, skip updating node out-of-service taint (should not happen)")
return
}
if az.KubeClient == nil {
klog.Warningf("az.KubeClient is nil, skip updating node out-of-service taint")
return
}

if isNodeReady(node) {
if err := cloudnodeutil.RemoveTaintOffNode(az.KubeClient, node.Name, node, nodeOutOfServiceTaint); err != nil {
klog.Errorf("failed to remove taint %s from the node %s", v1.TaintNodeOutOfService, node.Name)
}
} else {
// node shutdown taint is added when cloud provider determines instance is shutdown
if !taints.TaintExists(node.Spec.Taints, nodeOutOfServiceTaint) &&
taints.TaintExists(node.Spec.Taints, nodeShutdownTaint) {
klog.V(2).Infof("adding %s taint to node %s", v1.TaintNodeOutOfService, node.Name)
if err := cloudnodeutil.AddOrUpdateTaintOnNode(az.KubeClient, node.Name, nodeOutOfServiceTaint); err != nil {
klog.Errorf("failed to add taint %s to the node %s", v1.TaintNodeOutOfService, node.Name)
}
} else {
klog.V(2).Infof("node %s is not ready but node shutdown taint is not added, skip adding node out-of-service taint", node.Name)
}
}
}

// GetActiveZones returns all the zones in which k8s nodes are currently running.
func (az *Cloud) GetActiveZones() (sets.Set[string], error) {
if az.nodeInformerSynced == nil {
Expand Down Expand Up @@ -1296,3 +1339,13 @@ func (az *Cloud) ShouldNodeExcludedFromLoadBalancer(nodeName string) (bool, erro

return az.excludeLoadBalancerNodes.Has(nodeName), nil
}

func isNodeReady(node *v1.Node) bool {
if node == nil {
return false
}
if _, c := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady); c != nil {
return c.Status == v1.ConditionTrue
}
return false
}
184 changes: 184 additions & 0 deletions pkg/provider/azure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
servicehelpers "k8s.io/cloud-provider/service/helpers"
Expand All @@ -52,6 +53,7 @@ import (
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
providerconfig "sigs.k8s.io/cloud-provider-azure/pkg/provider/config"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
"sigs.k8s.io/cloud-provider-azure/pkg/util/taints"
)

const (
Expand Down Expand Up @@ -3508,6 +3510,124 @@ func TestUpdateNodeCaches(t *testing.T) {
assert.Equal(t, 1, len(az.nodeNames))
}

func TestUpdateNodeTaint(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
az := GetTestCloud(ctrl)

tests := []struct {
desc string
node *v1.Node
hasTaint bool
}{
{
desc: "ready node without taint should not have taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
},
hasTaint: false,
},
{
desc: "ready node with taint should remove the taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{*nodeOutOfServiceTaint},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
},
hasTaint: false,
},
{
desc: "not-ready node without shutdown taint should not have out-of-service taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
hasTaint: false,
},
{
desc: "not-ready node with shutdown taint should have out-of-service taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{*nodeShutdownTaint},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
hasTaint: true,
},
{
desc: "not-ready node with out-of-service taint should keep it",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{*nodeOutOfServiceTaint},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
hasTaint: true,
},
}
for _, test := range tests {
cs := fake.NewSimpleClientset(test.node)
az.KubeClient = cs
az.updateNodeTaint(test.node)
newNode, _ := cs.CoreV1().Nodes().Get(context.Background(), "node", metav1.GetOptions{})
assert.Equal(t, test.hasTaint, taints.TaintExists(newNode.Spec.Taints, nodeOutOfServiceTaint), test.desc)
}
}

func TestUpdateNodeCacheExcludeLoadBalancer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down Expand Up @@ -3828,3 +3948,67 @@ func TestFindSecurityRule(t *testing.T) {
assert.Equal(t, testCases[i].expected, found, testCases[i].desc)
}
}

func TestIsNodeReady(t *testing.T) {
tests := []struct {
name string
node *v1.Node
expected bool
}{
{
node: nil,
expected: false,
},
{
node: &v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
},
expected: true,
},
{
node: &v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
expected: false,
},
{
node: &v1.Node{
Status: v1.NodeStatus{},
},
expected: false,
},
{
node: &v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionTrue,
},
},
},
},
expected: false,
},
}

for _, test := range tests {
if got := isNodeReady(test.node); got != test.expected {
assert.Equal(t, test.expected, got)
}
}
}

0 comments on commit 7fe6b82

Please sign in to comment.