Skip to content

Commit

Permalink
Add a priority queue to implement MultiCIDR tie-breaks
Browse files Browse the repository at this point in the history
The Priority is determined as follows:
P0: ClusterCIDR with higher number of matching labels has highest
priority.
P1: ClusterCIDR having cidrSet with fewer allocatable Pod CIDRs has
higher priority.
P2: ClusterCIDR with a PerNodeMaskSize having fewer IPs has higher
priority.
P3: ClusterCIDR having label with lower alphanumeric value has higher
priority.
P4: ClusterCIDR with a cidrSet having a smaller IP address value has
higher priority.
  • Loading branch information
sarveshr7 committed Aug 5, 2022
1 parent b6392a4 commit 02d944d
Show file tree
Hide file tree
Showing 2 changed files with 310 additions and 0 deletions.
140 changes: 140 additions & 0 deletions pkg/controller/nodeipam/ipam/multi_cidr_priority_queue.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ipam

import (
"math"

cidrset "k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
)

// A PriorityQueue implementation based on https://pkg.go.dev/container/heap#example-package-PriorityQueue

// An PriorityQueueItem is something we manage in a priority queue.
type PriorityQueueItem struct {
clusterCIDR *cidrset.ClusterCIDR
// labelMatchCount is the first determinant of priority.
labelMatchCount int
// selectorString is a string representation of the labelSelector associated with the cidrSet.
selectorString string
// index is needed by update and is maintained by the heap.Interface methods.
index int // The index of the item in the heap.
}

// A PriorityQueue implements heap.Interface and holds PriorityQueueItems.
type PriorityQueue []*PriorityQueueItem

func (pq PriorityQueue) Len() int { return len(pq) }

// Less compares the priority queue items, to store in a min heap.
// Less(i,j) == true denotes i has higher priority than j.
func (pq PriorityQueue) Less(i, j int) bool {
if pq[i].labelMatchCount != pq[j].labelMatchCount {
// P0: CidrSet with higher number of matching labels has the highest priority.
return pq[i].labelMatchCount > pq[j].labelMatchCount
}

// If the count of matching labels is equal, compare the max allocatable pod CIDRs.
if pq[i].maxAllocatable() != pq[j].maxAllocatable() {
// P1: CidrSet with fewer allocatable pod CIDRs has higher priority.
return pq[i].maxAllocatable() < pq[j].maxAllocatable()
}

// If the value of allocatable pod CIDRs is equal, compare the node mask size.
if pq[i].nodeMaskSize() != pq[j].nodeMaskSize() {
// P2: CidrSet with a PerNodeMaskSize having fewer IPs has higher priority.
// For example, `27` (32 IPs) picked before `25` (128 IPs).
return pq[i].nodeMaskSize() > pq[j].nodeMaskSize()
}

// If the per node mask size are equal compare the CIDR labels.
if pq[i].selectorString != pq[j].selectorString {
// P3: CidrSet having label with lower alphanumeric value has higher priority.
return pq[i].selectorString < pq[j].selectorString
}

// P4: CidrSet having an alpha-numerically smaller IP address value has a higher priority.
return pq[i].cidrLabel() < pq[j].cidrLabel()
}

func (pq PriorityQueue) Swap(i, j int) {
pq[i], pq[j] = pq[j], pq[i]
pq[i].index = i
pq[j].index = j
}

func (pq *PriorityQueue) Push(x interface{}) {
n := len(*pq)
if item, ok := x.(*PriorityQueueItem); ok {
item.index = n
*pq = append(*pq, item)
}
}

func (pq *PriorityQueue) Pop() interface{} {
old := *pq
n := len(old)
item := old[n-1]
old[n-1] = nil // avoid memory leak.
item.index = -1 // for safety.
*pq = old[0 : n-1]
return item
}

// maxAllocatable computes the minimum value of the MaxCIDRs for a ClusterCIDR.
// It compares the MaxCIDRs for each CIDR family and returns the minimum.
// e.g. IPv4 - 10.0.0.0/16 PerNodeMaskSize: 24 MaxCIDRs = 256
// IPv6 - ff:ff::/120 PerNodeMaskSize: 120 MaxCIDRs = 1
// MaxAllocatable for this ClusterCIDR = 1
func (pqi *PriorityQueueItem) maxAllocatable() int {
ipv4Allocatable := math.MaxInt
ipv6Allocatable := math.MaxInt

if pqi.clusterCIDR.IPv4CIDRSet != nil {
ipv4Allocatable = pqi.clusterCIDR.IPv4CIDRSet.MaxCIDRs
}

if pqi.clusterCIDR.IPv6CIDRSet != nil {
ipv6Allocatable = pqi.clusterCIDR.IPv6CIDRSet.MaxCIDRs
}

if ipv4Allocatable < ipv6Allocatable {
return ipv4Allocatable
}

return ipv6Allocatable
}

// nodeMaskSize returns IPv4 NodeMaskSize if present, else returns IPv6 NodeMaskSize.
// Note the requirement: 32 - IPv4 NodeMaskSize == 128 - IPv6 NodeMaskSize
// Due to the above requirement it does not matter which NodeMaskSize we compare.
func (pqi *PriorityQueueItem) nodeMaskSize() int {
if pqi.clusterCIDR.IPv4CIDRSet != nil {
return pqi.clusterCIDR.IPv4CIDRSet.NodeMaskSize
}

return pqi.clusterCIDR.IPv6CIDRSet.NodeMaskSize
}

// cidrLabel returns IPv4 CIDR if present, else returns IPv6 CIDR.
func (pqi *PriorityQueueItem) cidrLabel() string {
if pqi.clusterCIDR.IPv4CIDRSet != nil {
return pqi.clusterCIDR.IPv4CIDRSet.Label
}

return pqi.clusterCIDR.IPv6CIDRSet.Label
}
170 changes: 170 additions & 0 deletions pkg/controller/nodeipam/ipam/multi_cidr_priority_queue_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ipam

import (
"container/heap"
"testing"

"k8s.io/kubernetes/pkg/controller/nodeipam/ipam/multicidrset"
utilnet "k8s.io/utils/net"
)

func createTestPriorityQueueItem(name, cidr, selectorString string, labelMatchCount, perNodeHostBits int) *PriorityQueueItem {
_, clusterCIDR, _ := utilnet.ParseCIDRSloppy(cidr)
cidrSet, _ := multicidrset.NewMultiCIDRSet(clusterCIDR, perNodeHostBits)

return &PriorityQueueItem{
clusterCIDR: &multicidrset.ClusterCIDR{
Name: name,
IPv4CIDRSet: cidrSet,
},
labelMatchCount: labelMatchCount,
selectorString: selectorString,
}
}

func TestPriorityQueue(t *testing.T) {

pqi1 := createTestPriorityQueueItem("cidr1", "192.168.0.0/16", "foo=bar,name=test1", 1, 8)
pqi2 := createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8)
pqi3 := createTestPriorityQueueItem("cidr3", "172.16.0.0/16", "foo=bar,name=test3", 2, 8)
pqi4 := createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6)
pqi5 := createTestPriorityQueueItem("cidr5", "10.1.2.0/26", "foo=bar,name=test5", 2, 6)
pqi6 := createTestPriorityQueueItem("cidr6", "10.1.3.0/26", "abc=bar,name=test4", 2, 6)

for _, testQueue := range []struct {
name string
items []*PriorityQueueItem
want *PriorityQueueItem
}{
{"Test queue with single item", []*PriorityQueueItem{pqi1}, pqi1},
{"Test queue with items having different labelMatchCount", []*PriorityQueueItem{pqi1, pqi2}, pqi2},
{"Test queue with items having same labelMatchCount, different max Allocatable Pod CIDRs", []*PriorityQueueItem{pqi1, pqi2, pqi3}, pqi2},
{"Test queue with items having same labelMatchCount, max Allocatable Pod CIDRs, different PerNodeMaskSize", []*PriorityQueueItem{pqi1, pqi2, pqi4}, pqi4},
{"Test queue with items having same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, different labels", []*PriorityQueueItem{pqi1, pqi2, pqi4, pqi5}, pqi4},
{"Test queue with items having same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, labels, different IP addresses", []*PriorityQueueItem{pqi1, pqi2, pqi4, pqi5, pqi6}, pqi4},
} {
pq := make(PriorityQueue, 0)
for _, pqi := range testQueue.items {
heap.Push(&pq, pqi)
}

got := heap.Pop(&pq)

if got != testQueue.want {
t.Errorf("Error, wanted: %+v, got: %+v", testQueue.want, got)
}
}
}

func TestLess(t *testing.T) {

for _, testQueue := range []struct {
name string
items []*PriorityQueueItem
want bool
}{
{
name: "different labelMatchCount, i higher priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr1", "192.168.0.0/16", "foo=bar,name=test1", 2, 8),
createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 1, 8),
},
want: true,
},
{
name: "different labelMatchCount, i lower priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr1", "192.168.0.0/16", "foo=bar,name=test1", 1, 8),
createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8),
},
want: false,
},
{
name: "same labelMatchCount, different max allocatable cidrs, i higher priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8),
createTestPriorityQueueItem("cidr3", "172.16.0.0/16", "foo=bar,name=test3", 2, 8),
},
want: true,
},
{
name: "same labelMatchCount, different max allocatable cidrs, i lower priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr2", "10.1.0.0/16", "foo=bar,name=test2", 2, 8),
createTestPriorityQueueItem("cidr3", "172.16.0.0/24", "foo=bar,name=test3", 2, 8),
},
want: false,
},
{
name: "same labelMatchCount, max allocatable cidrs, different PerNodeMaskSize i higher priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr2", "10.1.0.0/26", "foo=bar,name=test2", 2, 6),
createTestPriorityQueueItem("cidr4", "10.1.1.0/24", "abc=bar,name=test4", 2, 8),
},
want: true,
},
{
name: "same labelMatchCount, max allocatable cidrs, different PerNodeMaskSize i lower priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr2", "10.1.0.0/24", "foo=bar,name=test2", 2, 8),
createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6),
},
want: false,
},
{
name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, different labels i higher priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6),
createTestPriorityQueueItem("cidr5", "10.1.2.0/26", "foo=bar,name=test5", 2, 6),
},
want: true,
},
{
name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, different labels i lower priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "xyz=bar,name=test4", 2, 6),
createTestPriorityQueueItem("cidr5", "10.1.2.0/26", "foo=bar,name=test5", 2, 6),
},
want: false,
},
{
name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, labels, different IP addresses i higher priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "abc=bar,name=test4", 2, 6),
createTestPriorityQueueItem("cidr6", "10.1.3.0/26", "abc=bar,name=test4", 2, 6),
},
want: true,
},
{
name: "same labelMatchCount, max Allocatable Pod CIDRs, PerNodeMaskSize, labels, different IP addresses i lower priority than j",
items: []*PriorityQueueItem{
createTestPriorityQueueItem("cidr4", "10.1.1.0/26", "xyz=bar,name=test4", 2, 6),
createTestPriorityQueueItem("cidr6", "10.0.3.0/26", "abc=bar,name=test4", 2, 6),
},
want: false,
},
} {
var pq PriorityQueue
pq = testQueue.items
got := pq.Less(0, 1)
if got != testQueue.want {
t.Errorf("Error, wanted: %v, got: %v\nTest %q \npq[0]: %+v \npq[1]: %+v ", testQueue.want, got, testQueue.name, pq[0], pq[1])
}
}
}

0 comments on commit 02d944d

Please sign in to comment.