Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG 1824215: Allow small tolerance on memory capacity when comparing nodegroups #152

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 17 additions & 15 deletions cluster-autoscaler/processors/nodegroupset/compare_nodegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ const (
// MaxFreeDifferenceRatio describes how free resources (allocatable - daemon and system pods)
// can differ between groups in the same NodeGroupSet
MaxFreeDifferenceRatio = 0.05
// MaxMemoryDifferenceInKiloBytes describes how much memory
// capacity can differ but still be considered equal.
MaxMemoryDifferenceInKiloBytes = 256000
// MaxCapacityMemoryDifferenceRatio describes how Node.Status.Capacity.Memory can differ between
// groups in the same NodeGroupSet
MaxCapacityMemoryDifferenceRatio = 0.015
)

// BasicIgnoredLabels define a set of basic labels that should be ignored when comparing the similarity
Expand All @@ -53,21 +53,25 @@ var BasicIgnoredLabels = map[string]bool{
// similar enough to be considered a part of a single NodeGroupSet.
type NodeInfoComparator func(n1, n2 *schedulernodeinfo.NodeInfo) bool

func compareResourceMapsWithTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
func resourceMapsWithinTolerance(resources map[apiv1.ResourceName][]resource.Quantity,
maxDifferenceRatio float64) bool {
for _, qtyList := range resources {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
if larger-smaller > larger*maxDifferenceRatio {
if !resourceListWithinTolerance(qtyList, maxDifferenceRatio) {
return false
}
}
return true
}

func resourceListWithinTolerance(qtyList []resource.Quantity, maxDifferenceRatio float64) bool {
if len(qtyList) != 2 {
return false
}
larger := math.Max(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
smaller := math.Min(float64(qtyList[0].MilliValue()), float64(qtyList[1].MilliValue()))
return larger-smaller <= larger*maxDifferenceRatio
}

func compareLabels(nodes []*schedulernodeinfo.NodeInfo, ignoredLabels map[string]bool) bool {
labels := make(map[string][]string)
for _, node := range nodes {
Expand Down Expand Up @@ -123,9 +127,7 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
}
switch kind {
case apiv1.ResourceMemory:
// For memory capacity we allow a small tolerance
memoryDifference := math.Abs(float64(qtyList[0].Value()) - float64(qtyList[1].Value()))
if memoryDifference > MaxMemoryDifferenceInKiloBytes {
if !resourceListWithinTolerance(qtyList, MaxCapacityMemoryDifferenceRatio) {
return false
}
default:
Expand All @@ -139,10 +141,10 @@ func IsCloudProviderNodeInfoSimilar(n1, n2 *schedulernodeinfo.NodeInfo, ignoredL
}

// For allocatable and free we allow resource quantities to be within a few % of each other
if !compareResourceMapsWithTolerance(allocatable, MaxAllocatableDifferenceRatio) {
if !resourceMapsWithinTolerance(allocatable, MaxAllocatableDifferenceRatio) {
return false
}
if !compareResourceMapsWithTolerance(free, MaxFreeDifferenceRatio) {
if !resourceMapsWithinTolerance(free, MaxFreeDifferenceRatio) {
return false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,16 +97,56 @@ func TestNodesSimilarVariousRequirementsAndPods(t *testing.T) {
}

func TestNodesSimilarVariousMemoryRequirements(t *testing.T) {
n1 := BuildTestNode("node1", 1000, MaxMemoryDifferenceInKiloBytes)
n1 := BuildTestNode("node1", 1000, 1000)

// Different memory capacity within tolerance
n2 := BuildTestNode("node2", 1000, MaxMemoryDifferenceInKiloBytes)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes, resource.DecimalSI)
n2 := BuildTestNode("node2", 1000, 1000)
n2.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)+1, resource.DecimalSI)
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
n3 := BuildTestNode("node3", 1000, MaxMemoryDifferenceInKiloBytes)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(2*MaxMemoryDifferenceInKiloBytes+1, resource.DecimalSI)
n3 := BuildTestNode("node3", 1000, 1000)
n3.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(1000-(1000*MaxCapacityMemoryDifferenceRatio)-1, resource.DecimalSI)
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM5XLarge(t *testing.T) {
// Use realistic memory capacity (taken from real nodes)
// 15944120 KB ~= 16GiB (m5.xLarge)
q1 := resource.MustParse("16116152Ki")
q2 := resource.MustParse("15944120Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("16438475Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

func TestNodesSimilarVariousLargeMemoryRequirementsM516XLarge(t *testing.T) {
// Use realistic memory capacity (taken from real nodes)
// 257217528 KB ~= 256GiB (m5.16xLarge)
q1 := resource.MustParse("259970052Ki")
q2 := resource.MustParse("257217528Ki")

n1 := BuildTestNode("node1", 1000, q1.Value())

// Different memory capacity within tolerance
// Value taken from another m5.xLarge in a different zone
n2 := BuildTestNode("node2", 1000, q2.Value())
checkNodesSimilar(t, n1, n2, IsNodeInfoSimilar, true)

// Different memory capacity exceeds tolerance
// Value of q1 * 1.02
q3 := resource.MustParse("265169453Ki")
n3 := BuildTestNode("node3", 1000, q3.Value())
checkNodesSimilar(t, n1, n3, IsNodeInfoSimilar, false)
}

Expand Down