Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add node out-of-service taint when node is shutdown #4508

Merged
merged 1 commit into from
Aug 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
53 changes: 53 additions & 0 deletions pkg/provider/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ import (
"k8s.io/client-go/tools/record"
"k8s.io/client-go/util/flowcontrol"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
cloudnodeutil "k8s.io/cloud-provider/node/helpers"
nodeutil "k8s.io/component-helpers/node/util"
"k8s.io/klog/v2"

azclients "sigs.k8s.io/cloud-provider-azure/pkg/azureclients"
Expand Down Expand Up @@ -77,6 +80,7 @@ import (
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
"sigs.k8s.io/cloud-provider-azure/pkg/util/taints"

"sigs.k8s.io/yaml"
)
Expand All @@ -88,6 +92,14 @@ var (
defaultDisableOutboundSNAT = false
// RouteUpdateWaitingInSeconds is 30 seconds by default.
defaultRouteUpdateWaitingInSeconds = 30
nodeOutOfServiceTaint = &v1.Taint{
Key: v1.TaintNodeOutOfService,
Effect: v1.TaintEffectNoExecute,
}
nodeShutdownTaint = &v1.Taint{
Key: cloudproviderapi.TaintNodeShutdown,
Effect: v1.TaintEffectNoSchedule,
}
)

// Config holds the configuration parsed from the --cloud-config flag
Expand Down Expand Up @@ -1174,11 +1186,13 @@ func (az *Cloud) SetInformers(informerFactory informers.SharedInformerFactory) {
AddFunc: func(obj interface{}) {
node := obj.(*v1.Node)
az.updateNodeCaches(nil, node)
az.updateNodeTaint(node)
},
UpdateFunc: func(prev, obj interface{}) {
prevNode := prev.(*v1.Node)
newNode := obj.(*v1.Node)
az.updateNodeCaches(prevNode, newNode)
az.updateNodeTaint(newNode)
},
DeleteFunc: func(obj interface{}) {
node, isNode := obj.(*v1.Node)
Expand Down Expand Up @@ -1318,6 +1332,35 @@ func (az *Cloud) updateNodeCaches(prevNode, newNode *v1.Node) {
}
}

// updateNodeTaint updates node out-of-service taint
func (az *Cloud) updateNodeTaint(node *v1.Node) {
if node == nil {
jwtty marked this conversation as resolved.
Show resolved Hide resolved
klog.Warningf("node is nil, skip updating node out-of-service taint (should not happen)")
return
jwtty marked this conversation as resolved.
Show resolved Hide resolved
}
jwtty marked this conversation as resolved.
Show resolved Hide resolved
if az.KubeClient == nil {
klog.Warningf("az.KubeClient is nil, skip updating node out-of-service taint")
return
}

if isNodeReady(node) {
if err := cloudnodeutil.RemoveTaintOffNode(az.KubeClient, node.Name, node, nodeOutOfServiceTaint); err != nil {
klog.Errorf("failed to remove taint %s from the node %s", v1.TaintNodeOutOfService, node.Name)
}
} else {
// node shutdown taint is added when cloud provider determines instance is shutdown
if !taints.TaintExists(node.Spec.Taints, nodeOutOfServiceTaint) &&
andyzhangx marked this conversation as resolved.
Show resolved Hide resolved
taints.TaintExists(node.Spec.Taints, nodeShutdownTaint) {
klog.V(2).Infof("adding %s taint to node %s", v1.TaintNodeOutOfService, node.Name)
if err := cloudnodeutil.AddOrUpdateTaintOnNode(az.KubeClient, node.Name, nodeOutOfServiceTaint); err != nil {
klog.Errorf("failed to add taint %s to the node %s", v1.TaintNodeOutOfService, node.Name)
}
} else {
klog.V(2).Infof("node %s is not ready but node shutdown taint is not added, skip adding node out-of-service taint", node.Name)
}
}
}

// GetActiveZones returns all the zones in which k8s nodes are currently running.
func (az *Cloud) GetActiveZones() (sets.Set[string], error) {
if az.nodeInformerSynced == nil {
Expand Down Expand Up @@ -1452,3 +1495,13 @@ func (az *Cloud) getActiveNodesByLoadBalancerName(lbName string) sets.Set[string

return sets.New[string]()
}

func isNodeReady(node *v1.Node) bool {
if node == nil {
return false
}
if _, c := nodeutil.GetNodeCondition(&node.Status, v1.NodeReady); c != nil {
return c.Status == v1.ConditionTrue
}
return false
}
184 changes: 184 additions & 0 deletions pkg/provider/azure_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import (
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
cloudprovider "k8s.io/cloud-provider"
cloudproviderapi "k8s.io/cloud-provider/api"
servicehelpers "k8s.io/cloud-provider/service/helpers"
Expand All @@ -54,6 +55,7 @@ import (
"sigs.k8s.io/cloud-provider-azure/pkg/consts"
providerconfig "sigs.k8s.io/cloud-provider-azure/pkg/provider/config"
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
"sigs.k8s.io/cloud-provider-azure/pkg/util/taints"
)

const (
Expand Down Expand Up @@ -3544,6 +3546,124 @@ func TestUpdateNodeCaches(t *testing.T) {
assert.Equal(t, 1, len(az.nodeNames))
}

func TestUpdateNodeTaint(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
az := GetTestCloud(ctrl)

tests := []struct {
desc string
node *v1.Node
hasTaint bool
}{
{
desc: "ready node without taint should not have taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
},
hasTaint: false,
},
{
desc: "ready node with taint should remove the taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{*nodeOutOfServiceTaint},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
},
hasTaint: false,
},
{
desc: "not-ready node without shutdown taint should not have out-of-service taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
hasTaint: false,
},
{
desc: "not-ready node with shutdown taint should have out-of-service taint",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{*nodeShutdownTaint},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
hasTaint: true,
},
{
desc: "not-ready node with out-of-service taint should keep it",
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: "node",
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{*nodeOutOfServiceTaint},
},
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
hasTaint: true,
},
}
for _, test := range tests {
cs := fake.NewSimpleClientset(test.node)
az.KubeClient = cs
az.updateNodeTaint(test.node)
newNode, _ := cs.CoreV1().Nodes().Get(context.Background(), "node", metav1.GetOptions{})
assert.Equal(t, test.hasTaint, taints.TaintExists(newNode.Spec.Taints, nodeOutOfServiceTaint), test.desc)
}
}

func TestUpdateNodeCacheExcludeLoadBalancer(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()
Expand Down Expand Up @@ -3966,3 +4086,67 @@ func TestCheckEnableMultipleStandardLoadBalancers(t *testing.T) {
err = az.checkEnableMultipleStandardLoadBalancers()
assert.Equal(t, "duplicated primary VMSet vmss-2 in multiple standard load balancer configurations lb2", err.Error())
}

func TestIsNodeReady(t *testing.T) {
tests := []struct {
name string
node *v1.Node
expected bool
}{
{
node: nil,
expected: false,
},
{
node: &v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionTrue,
},
},
},
},
expected: true,
},
{
node: &v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeReady,
Status: v1.ConditionFalse,
},
},
},
},
expected: false,
},
{
node: &v1.Node{
Status: v1.NodeStatus{},
},
expected: false,
},
{
node: &v1.Node{
Status: v1.NodeStatus{
Conditions: []v1.NodeCondition{
{
Type: v1.NodeMemoryPressure,
Status: v1.ConditionTrue,
},
},
},
},
expected: false,
},
}

for _, test := range tests {
if got := isNodeReady(test.node); got != test.expected {
assert.Equal(t, test.expected, got)
}
}
}