Skip to content

Commit

Permalink
Do not count deleted nodes as upcoming nodes
Browse files Browse the repository at this point in the history
  • Loading branch information
ksmiley authored and bflanigan-sfdc committed Aug 30, 2023
1 parent 849890d commit be3da34
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 56 deletions.
2 changes: 1 addition & 1 deletion cluster-autoscaler/clusterstate/clusterstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ func (csr *ClusterStateRegistry) GetUpcomingNodes() map[string]int {
readiness := csr.perNodeGroupReadiness[id]
ar := csr.acceptableRanges[id]
// newNodes is the number of nodes that
newNodes := ar.CurrentTarget - (readiness.Ready + readiness.Unready + readiness.LongUnregistered)
newNodes := ar.CurrentTarget - (readiness.Ready + readiness.Unready + readiness.Deleted + readiness.LongUnregistered)
if newNodes <= 0 {
// Negative value is unlikely but theoretically possible.
continue
Expand Down
65 changes: 10 additions & 55 deletions cluster-autoscaler/clusterstate/clusterstate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,9 @@ import (

apiv1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/api"
"k8s.io/autoscaler/cluster-autoscaler/clusterstate/utils"
"k8s.io/autoscaler/cluster-autoscaler/utils/deletetaint"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
"k8s.io/client-go/kubernetes/fake"
kube_record "k8s.io/client-go/tools/record"
Expand Down Expand Up @@ -497,21 +495,16 @@ func TestUpcomingNodes(t *testing.T) {
provider.AddNodeGroup("ng4", 1, 10, 1)
provider.AddNode("ng4", ng4_1)

// One node is already there, for a second nde deletion / draining was already started.
// Deleted node should not be counted as an upcoming node.
ng5_1 := BuildTestNode("ng5-1", 1000, 1000)
SetNodeReadyState(ng5_1, true, now.Add(-time.Minute))
ng5_2 := BuildTestNode("ng5-2", 1000, 1000)
SetNodeReadyState(ng5_2, true, now.Add(-time.Minute))
ng5_2.Spec.Taints = []apiv1.Taint{
{
Key: deletetaint.ToBeDeletedTaint,
Value: fmt.Sprint(time.Now().Unix()),
Effect: apiv1.TaintEffectNoSchedule,
},
}
provider.AddNodeGroup("ng5", 1, 10, 2)
ng5_1.Spec.Taints = append(ng5_1.Spec.Taints, apiv1.Taint{
Key: "ToBeDeletedByClusterAutoscaler",
Value: fmt.Sprint(time.Now().Unix()),
Effect: apiv1.TaintEffectNoSchedule,
})
SetNodeReadyState(ng5_1, false, now.Add(-time.Minute))
provider.AddNodeGroup("ng5", 1, 10, 1)
provider.AddNode("ng5", ng5_1)
provider.AddNode("ng5", ng5_2)

assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
Expand All @@ -520,7 +513,7 @@ func TestUpcomingNodes(t *testing.T) {
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, ng3_1, ng4_1, ng5_1, ng5_2}, nil, now)
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng2_1, ng3_1, ng4_1, ng5_1}, nil, now)
assert.NoError(t, err)
assert.Empty(t, clusterstate.GetScaleUpFailures())

Expand All @@ -529,45 +522,7 @@ func TestUpcomingNodes(t *testing.T) {
assert.Equal(t, 1, upcomingNodes["ng2"])
assert.Equal(t, 2, upcomingNodes["ng3"])
assert.NotContains(t, upcomingNodes, "ng4")
assert.Equal(t, 0, upcomingNodes["ng5"])
}

func TestTaintBasedNodeDeletion(t *testing.T) {
// Create a new Cloud Provider that does not implement the HasInstance check
// it will return the ErrNotImplemented error instead.
provider := testprovider.NewTestNodeDeletionDetectionCloudProvider(nil, nil,
func(string) (bool, error) { return false, cloudprovider.ErrNotImplemented })
now := time.Now()

// One node is already there, for a second nde deletion / draining was already started.
ng1_1 := BuildTestNode("ng1-1", 1000, 1000)
SetNodeReadyState(ng1_1, true, now.Add(-time.Minute))
ng1_2 := BuildTestNode("ng1-2", 1000, 1000)
SetNodeReadyState(ng1_2, true, now.Add(-time.Minute))
ng1_2.Spec.Taints = []apiv1.Taint{
{
Key: deletetaint.ToBeDeletedTaint,
Value: fmt.Sprint(time.Now().Unix()),
Effect: apiv1.TaintEffectNoSchedule,
},
}
provider.AddNodeGroup("ng1", 1, 10, 2)
provider.AddNode("ng1", ng1_1)
provider.AddNode("ng1", ng1_2)

assert.NotNil(t, provider)
fakeClient := &fake.Clientset{}
fakeLogRecorder, _ := utils.NewStatusMapRecorder(fakeClient, "kube-system", kube_record.NewFakeRecorder(5), false, "my-cool-configmap")
clusterstate := NewClusterStateRegistry(provider, ClusterStateRegistryConfig{
MaxTotalUnreadyPercentage: 10,
OkTotalUnreadyCount: 1,
}, fakeLogRecorder, newBackoff())
err := clusterstate.UpdateNodes([]*apiv1.Node{ng1_1, ng1_2}, nil, now)
assert.NoError(t, err)
assert.Empty(t, clusterstate.GetScaleUpFailures())

upcomingNodes := clusterstate.GetUpcomingNodes()
assert.Equal(t, 1, upcomingNodes["ng1"])
assert.NotContains(t, upcomingNodes, "ng5")
}

func TestIncorrectSize(t *testing.T) {
Expand Down

0 comments on commit be3da34

Please sign in to comment.