Skip to content

Commit

Permalink
Add support for passing in custom ignore labels
Browse files Browse the repository at this point in the history
  • Loading branch information
adammw committed Mar 17, 2020
1 parent 5476125 commit 8313e96
Show file tree
Hide file tree
Showing 12 changed files with 123 additions and 51 deletions.
3 changes: 3 additions & 0 deletions cluster-autoscaler/config/autoscaling_options.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ type AutoscalingOptions struct {
MaxBulkSoftTaintTime time.Duration
// IgnoredTaints is a list of taints to ignore when considering a node template for scheduling.
IgnoredTaints []string
// BalancingExtraIgnoredLabels is a list of labels to additionally ignore when comparing if two node groups are similar.
// Labels in BasicIgnoredLabels and the cloud provider-specific ignored labels are always ignored.
BalancingExtraIgnoredLabels []string
// AWSUseStaticInstanceList tells if AWS cloud provider use static instance type list or dynamically fetch from remote APIs.
AWSUseStaticInstanceList bool
// Path to kube configuration if available
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/core/scale_test_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func NewTestProcessors() *processors.AutoscalingProcessors {
return &processors.AutoscalingProcessors{
PodListProcessor: NewFilterOutSchedulablePodListProcessor(),
NodeGroupListProcessor: &nodegroups.NoOpNodeGroupListProcessor{},
NodeGroupSetProcessor: &nodegroupset.BalancingNodeGroupSetProcessor{},
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}),
// TODO(bskiba): change scale up test so that this can be a NoOpProcessor
ScaleUpStatusProcessor: &status.EventingScaleUpStatusProcessor{},
ScaleDownStatusProcessor: &status.NoOpScaleDownStatusProcessor{},
Expand Down
33 changes: 19 additions & 14 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,10 @@ var (
regional = flag.Bool("regional", false, "Cluster is regional.")
newPodScaleUpDelay = flag.Duration("new-pod-scale-up-delay", 0*time.Second, "Pods less than this old will not be considered for scale-up.")

ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group")
balancingIgnoreLabelsFlag = multiStringFlag("balancing-ignore-label", "Specifies a label to ignore in addition to the basic and cloud-provider set of labels when comparing if two node groups are similar")
awsUseStaticInstanceList = flag.Bool("aws-use-static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only")
enableProfiling = flag.Bool("profiling", false, "Is debug/pprof endpoint enabled")
)

func createAutoscalingOptions() config.AutoscalingOptions {
Expand Down Expand Up @@ -235,6 +236,7 @@ func createAutoscalingOptions() config.AutoscalingOptions {
Regional: *regional,
NewPodScaleUpDelay: *newPodScaleUpDelay,
IgnoredTaints: *ignoreTaintsFlag,
BalancingExtraIgnoredLabels: *balancingIgnoreLabelsFlag,
KubeConfigPath: *kubeConfigFile,
NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout,
AWSUseStaticInstanceList: *awsUseStaticInstanceList,
Expand Down Expand Up @@ -289,21 +291,24 @@ func buildAutoscaler() (core.Autoscaler, error) {
kubeClient := createKubeClient(getKubeConfig())
eventsKubeClient := createKubeClient(getKubeConfig())

processors := ca_processors.DefaultProcessors()
processors.PodListProcessor = core.NewFilterOutSchedulablePodListProcessor()
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodegroupset.CreateAzureNodeInfoComparator()}
} else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName {
processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodegroupset.CreateAwsNodeInfoComparator()}
}

opts := core.AutoscalerOptions{
AutoscalingOptions: autoscalingOptions,
KubeClient: kubeClient,
EventsKubeClient: eventsKubeClient,
Processors: processors,
}

opts.Processors = ca_processors.DefaultProcessors()
opts.Processors.PodListProcessor = core.NewFilterOutSchedulablePodListProcessor()

nodeInfoComparatorBuilder := nodegroupset.CreateGenericNodeInfoComparator
if autoscalingOptions.CloudProviderName == cloudprovider.AzureProviderName {
nodeInfoComparatorBuilder = nodegroupset.CreateAzureNodeInfoComparator
} else if autoscalingOptions.CloudProviderName == cloudprovider.AwsProviderName {
nodeInfoComparatorBuilder = nodegroupset.CreateAwsNodeInfoComparator
}

opts.Processors.NodeGroupSetProcessor = &nodegroupset.BalancingNodeGroupSetProcessor{
Comparator: nodeInfoComparatorBuilder(autoscalingOptions.BalancingExtraIgnoredLabels),
}

// This metric should be published only once.
Expand Down
9 changes: 8 additions & 1 deletion cluster-autoscaler/processors/nodegroupset/aws_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ import (
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"
)

func CreateAwsNodeInfoComparator() NodeInfoComparator {
// CreateAwsNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they match usual conditions checked by IsCloudProviderNodeInfoSimilar,
// even if they have different AWS-specific labels.
func CreateAwsNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
awsIgnoredLabels := map[string]bool{
"alpha.eksctl.io/instance-id": true, // this is a label used by eksctl to identify instances.
"alpha.eksctl.io/nodegroup-name": true, // this is a label used by eksctl to identify "node group" names.
Expand All @@ -33,6 +36,10 @@ func CreateAwsNodeInfoComparator() NodeInfoComparator {
awsIgnoredLabels[k] = v
}

for _, k := range extraIgnoredLabels {
awsIgnoredLabels[k] = true
}

return func(n1, n2 *schedulernodeinfo.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, awsIgnoredLabels)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,18 @@ func nodesFromSameAzureNodePool(n1, n2 *schedulernodeinfo.NodeInfo) bool {
return n1AzureNodePool != "" && n1AzureNodePool == n2AzureNodePool
}

// Returned NodeInfoComparator compares if two nodes should be considered part of the
// same NodeGroupSet. This is true if they either belong to the same Azure agentpool
// CreateAzureNodeInfoComparator returns a comparator that checks if two nodes should be considered
// part of the same NodeGroupSet. This is true if they either belong to the same Azure agentpool
// or match usual conditions checked by IsCloudProviderNodeInfoSimilar, even if they have different agentpool labels.
func CreateAzureNodeInfoComparator() NodeInfoComparator {
func CreateAzureNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
azureIgnoredLabels := make(map[string]bool)
for k, v := range BasicIgnoredLabels {
azureIgnoredLabels[k] = v
}
azureIgnoredLabels[AzureNodepoolLabel] = true
for _, k := range extraIgnoredLabels {
azureIgnoredLabels[k] = true
}

return func(n1, n2 *schedulernodeinfo.NodeInfo) bool {
if nodesFromSameAzureNodePool(n1, n2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
)

func TestIsAzureNodeInfoSimilar(t *testing.T) {
comparator := CreateAzureNodeInfoComparator()
comparator := CreateAzureNodeInfoComparator([]string{"example.com/ready"})
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
n1.ObjectMeta.Labels["character"] = "thing"
Expand Down Expand Up @@ -62,15 +62,21 @@ func TestIsAzureNodeInfoSimilar(t *testing.T) {
n1.ObjectMeta.Labels["agentpool"] = "foo"
n2.ObjectMeta.Labels["agentpool"] = "bar"
checkNodesSimilar(t, n1, n2, comparator, true)
// Custom label
n1.ObjectMeta.Labels["example.com/ready"] = "true"
n2.ObjectMeta.Labels["example.com/ready"] = "false"
checkNodesSimilar(t, n1, n2, comparator, true)
}

func TestFindSimilarNodeGroupsAzureBasic(t *testing.T) {
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator()}
basicSimilarNodeGroupsTest(t, processor)
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{})}
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

func TestFindSimilarNodeGroupsAzureByLabel(t *testing.T) {
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator()}
processor := &BalancingNodeGroupSetProcessor{Comparator: CreateAzureNodeInfoComparator([]string{})}
context := &context.AutoscalingContext{}

n1 := BuildTestNode("n1", 1000, 1000)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func (b *BalancingNodeGroupSetProcessor) FindSimilarNodeGroups(context *context.
}
comparator := b.Comparator
if comparator == nil {
panic("BalancingNodeGroupSetProcessor comparator not set")
klog.Fatal("BalancingNodeGroupSetProcessor comparator not set")
}
if comparator(nodeInfo, ngNodeInfo) {
result = append(result, ng)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,17 @@ package nodegroupset
import (
"testing"

schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
testprovider "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/test"
"k8s.io/autoscaler/cluster-autoscaler/context"
. "k8s.io/autoscaler/cluster-autoscaler/utils/test"
schedulernodeinfo "k8s.io/kubernetes/pkg/scheduler/nodeinfo"

"github.com/stretchr/testify/assert"
)

func basicSimilarNodeGroupsTest(t *testing.T, processor NodeGroupSetProcessor) {
context := &context.AutoscalingContext{}

func buildBasicNodeGroups(context *context.AutoscalingContext) (*schedulernodeinfo.NodeInfo, *schedulernodeinfo.NodeInfo, *schedulernodeinfo.NodeInfo) {
n1 := BuildTestNode("n1", 1000, 1000)
n2 := BuildTestNode("n2", 1000, 1000)
n3 := BuildTestNode("n3", 2000, 2000)
Expand All @@ -49,35 +48,71 @@ func basicSimilarNodeGroupsTest(t *testing.T, processor NodeGroupSetProcessor) {
ni3 := schedulernodeinfo.NewNodeInfo()
ni3.SetNode(n3)

context.CloudProvider = provider
return ni1, ni2, ni3
}

func basicSimilarNodeGroupsTest(
t *testing.T,
context *context.AutoscalingContext,
processor NodeGroupSetProcessor,
ni1 *schedulernodeinfo.NodeInfo,
ni2 *schedulernodeinfo.NodeInfo,
ni3 *schedulernodeinfo.NodeInfo,
) {
nodeInfosForGroups := map[string]*schedulernodeinfo.NodeInfo{
"ng1": ni1, "ng2": ni2, "ng3": ni3,
}

ng1, _ := provider.NodeGroupForNode(n1)
ng2, _ := provider.NodeGroupForNode(n2)
ng3, _ := provider.NodeGroupForNode(n3)
context.CloudProvider = provider
ng1, _ := context.CloudProvider.NodeGroupForNode(ni1.Node())
ng2, _ := context.CloudProvider.NodeGroupForNode(ni2.Node())
ng3, _ := context.CloudProvider.NodeGroupForNode(ni3.Node())

similar, err := processor.FindSimilarNodeGroups(context, ng1, nodeInfosForGroups)
assert.NoError(t, err)
assert.Equal(t, similar, []cloudprovider.NodeGroup{ng2})
assert.Equal(t, []cloudprovider.NodeGroup{ng2}, similar)

similar, err = processor.FindSimilarNodeGroups(context, ng2, nodeInfosForGroups)
assert.NoError(t, err)
assert.Equal(t, similar, []cloudprovider.NodeGroup{ng1})
assert.Equal(t, []cloudprovider.NodeGroup{ng1}, similar)

similar, err = processor.FindSimilarNodeGroups(context, ng3, nodeInfosForGroups)
assert.NoError(t, err)
assert.Equal(t, similar, []cloudprovider.NodeGroup{})
assert.Equal(t, []cloudprovider.NodeGroup{}, similar)
}

func TestFindSimilarNodeGroups(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor()
basicSimilarNodeGroupsTest(t, processor)
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
processor := NewDefaultNodeGroupSetProcessor([]string{})
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

func TestFindSimilarNodeGroupsCustomLabels(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)
ni1.Node().Labels["example.com/ready"] = "true"
ni2.Node().Labels["example.com/ready"] = "false"

processor := NewDefaultNodeGroupSetProcessor([]string{"example.com/ready"})
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

func TestFindSimilarNodeGroupsCustomComparator(t *testing.T) {
context := &context.AutoscalingContext{}
ni1, ni2, ni3 := buildBasicNodeGroups(context)

processor := &BalancingNodeGroupSetProcessor{
Comparator: func(n1, n2 *schedulernodeinfo.NodeInfo) bool {
return (n1.Node().Name == "n1" && n2.Node().Name == "n2") ||
(n1.Node().Name == "n2" && n2.Node().Name == "n1")
},
}
basicSimilarNodeGroupsTest(t, context, processor, ni1, ni2, ni3)
}

func TestBalanceSingleGroup(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor()
processor := NewDefaultNodeGroupSetProcessor([]string{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand All @@ -97,7 +132,7 @@ func TestBalanceSingleGroup(t *testing.T) {
}

func TestBalanceUnderMaxSize(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor()
processor := NewDefaultNodeGroupSetProcessor([]string{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand Down Expand Up @@ -147,7 +182,7 @@ func TestBalanceUnderMaxSize(t *testing.T) {
}

func TestBalanceHittingMaxSize(t *testing.T) {
processor := NewDefaultNodeGroupSetProcessor()
processor := NewDefaultNodeGroupSetProcessor([]string{})
context := &context.AutoscalingContext{}

provider := testprovider.NewTestCloudProvider(nil, nil)
Expand Down
14 changes: 11 additions & 3 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,19 @@ func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string
}
return true
}

// CreateGenericNodeInfoComparator returns a generic comparator that checks for node group similarity
// based on a standard set of widely-applicable ignore labels
func CreateGenericNodeInfoComparator() NodeInfoComparator {
func CreateGenericNodeInfoComparator(extraIgnoredLabels []string) NodeInfoComparator {
genericIgnoredLabels := make(map[string]bool)
for k, v := range BasicIgnoredLabels {
genericIgnoredLabels[k] = v
}
for _, k := range extraIgnoredLabels {
genericIgnoredLabels[k] = true
}

return func(n1, n2 *schedulernodeinfo.NodeInfo) bool {
return IsCloudProviderNodeInfoSimilar(n1, n2, BasicIgnoredLabels)
return IsCloudProviderNodeInfoSimilar(n1, n2, genericIgnoredLabels)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,14 @@ func checkNodesSimilarWithPods(t *testing.T, n1, n2 *apiv1.Node, pods1, pods2 []
}

func TestIdenticalNodesSimilar(t *testing.T) {
comparator := CreateGenericNodeInfoComparator()
comparator := CreateGenericNodeInfoComparator([]string{})
n1 := BuildTestNode("node1", 1000, 2000)
n2 := BuildTestNode("node2", 1000, 2000)
checkNodesSimilar(t, n1, n2, comparator, true)
}

func TestNodesSimilarVariousRequirements(t *testing.T) {
comparator := CreateGenericNodeInfoComparator()
comparator := CreateGenericNodeInfoComparator([]string{})
n1 := BuildTestNode("node1", 1000, 2000)

// Different CPU capacity
Expand All @@ -74,7 +74,7 @@ func TestNodesSimilarVariousRequirements(t *testing.T) {
}

func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
comparator := CreateGenericNodeInfoComparator()
comparator := CreateGenericNodeInfoComparator([]string{})
n1 := BuildTestNode("node1", 1000, 2000)
p1 := BuildTestPod("pod1", 500, 1000)
p1.Spec.NodeName = "node1"
Expand All @@ -100,7 +100,7 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
}

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
comparator := CreateGenericNodeInfoComparator()
comparator := CreateGenericNodeInfoComparator([]string{})
n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes)

// Different memory capacity within tolerance
Expand All @@ -115,7 +115,7 @@ func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
}

func TestNodesSimilarVariousLabels(t *testing.T) {
comparator := CreateGenericNodeInfoComparator()
comparator := CreateGenericNodeInfoComparator([]string{"example.com/ready"})
n1 := BuildTestNode("node1", 1000, 2000)
n1.ObjectMeta.Labels["test-label"] = "test-value"
n1.ObjectMeta.Labels["character"] = "winnie the pooh"
Expand Down Expand Up @@ -147,4 +147,9 @@ func TestNodesSimilarVariousLabels(t *testing.T) {
n1.ObjectMeta.Labels["beta.kubernetes.io/fluentd-ds-ready"] = "true"
delete(n2.ObjectMeta.Labels, "beta.kubernetes.io/fluentd-ds-ready")
checkNodesSimilar(t, n1, n2, comparator, true)

// Different custom labels should not matter
n1.ObjectMeta.Labels["example.com/ready"] = "true"
n2.ObjectMeta.Labels["example.com/ready"] = "false"
checkNodesSimilar(t, n1, n2, comparator, true)
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ func (n *NoOpNodeGroupSetProcessor) BalanceScaleUpBetweenGroups(context *context
func (n *NoOpNodeGroupSetProcessor) CleanUp() {}

// NewDefaultNodeGroupSetProcessor creates an instance of NodeGroupSetProcessor.
func NewDefaultNodeGroupSetProcessor() NodeGroupSetProcessor {
func NewDefaultNodeGroupSetProcessor(ignoredLabels []string) NodeGroupSetProcessor {
return &BalancingNodeGroupSetProcessor{
Comparator: CreateGenericNodeInfoComparator(),
Comparator: CreateGenericNodeInfoComparator(ignoredLabels),
}
}
2 changes: 1 addition & 1 deletion cluster-autoscaler/processors/processors.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ func DefaultProcessors() *AutoscalingProcessors {
return &AutoscalingProcessors{
PodListProcessor: pods.NewDefaultPodListProcessor(),
NodeGroupListProcessor: nodegroups.NewDefaultNodeGroupListProcessor(),
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor(),
NodeGroupSetProcessor: nodegroupset.NewDefaultNodeGroupSetProcessor([]string{}),
ScaleUpStatusProcessor: status.NewDefaultScaleUpStatusProcessor(),
ScaleDownNodeProcessor: nodes.NewPreFilteringScaleDownNodeProcessor(),
ScaleDownStatusProcessor: status.NewDefaultScaleDownStatusProcessor(),
Expand Down

0 comments on commit 8313e96

Please sign in to comment.