/
cidr_allocator.go
173 lines (149 loc) · 6.29 KB
/
cidr_allocator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/*
Copyright 2016 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package ipam
import (
"context"
"fmt"
"net"
"time"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
utilfeature "k8s.io/apiserver/pkg/util/feature"
informers "k8s.io/client-go/informers/core/v1"
networkinginformers "k8s.io/client-go/informers/networking/v1alpha1"
clientset "k8s.io/client-go/kubernetes"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/features"
)
// CIDRAllocatorType is the type of the allocator to use.
type CIDRAllocatorType string
const (
// RangeAllocatorType is the allocator that uses an internal CIDR
// range allocator to do node CIDR range allocations.
RangeAllocatorType CIDRAllocatorType = "RangeAllocator"
// MultiCIDRRangeAllocatorType is the allocator that uses an internal CIDR
// range allocator to do node CIDR range allocations.
MultiCIDRRangeAllocatorType CIDRAllocatorType = "MultiCIDRRangeAllocator"
// CloudAllocatorType is the allocator that uses cloud platform
// support to do node CIDR range allocations.
CloudAllocatorType CIDRAllocatorType = "CloudAllocator"
// IPAMFromClusterAllocatorType uses the ipam controller sync'ing the node
// CIDR range allocations from the cluster to the cloud.
IPAMFromClusterAllocatorType = "IPAMFromCluster"
// IPAMFromCloudAllocatorType uses the ipam controller sync'ing the node
// CIDR range allocations from the cloud to the cluster.
IPAMFromCloudAllocatorType = "IPAMFromCloud"
)
// TODO: figure out the good setting for those constants.
const (
// The amount of time the nodecontroller polls on the list nodes endpoint.
apiserverStartupGracePeriod = 10 * time.Minute
// The no. of NodeSpec updates NC can process concurrently.
cidrUpdateWorkers = 30
// The max no. of NodeSpec updates that can be enqueued.
cidrUpdateQueueSize = 5000
// cidrUpdateRetries is the no. of times a NodeSpec update will be retried before dropping it.
cidrUpdateRetries = 3
// updateRetryTimeout is the time to wait before requeing a failed node for retry
updateRetryTimeout = 250 * time.Millisecond
// maxUpdateRetryTimeout is the maximum amount of time between timeouts.
maxUpdateRetryTimeout = 5 * time.Second
// updateMaxRetries is the max retries for a failed node
updateMaxRetries = 10
)
// nodePollInterval is used in listing node
// This is a variable instead of a const to enable testing.
var nodePollInterval = 10 * time.Second
// CIDRAllocator is an interface implemented by things that know how
// to allocate/occupy/recycle CIDR for nodes.
type CIDRAllocator interface {
// AllocateOrOccupyCIDR looks at the given node, assigns it a valid
// CIDR if it doesn't currently have one or mark the CIDR as used if
// the node already have one.
AllocateOrOccupyCIDR(node *v1.Node) error
// ReleaseCIDR releases the CIDR of the removed node.
ReleaseCIDR(node *v1.Node) error
// Run starts all the working logic of the allocator.
Run(stopCh <-chan struct{})
}
// CIDRAllocatorParams is parameters that's required for creating new
// cidr range allocator.
type CIDRAllocatorParams struct {
// ClusterCIDRs is list of cluster cidrs.
ClusterCIDRs []*net.IPNet
// ServiceCIDR is primary service cidr for cluster.
ServiceCIDR *net.IPNet
// SecondaryServiceCIDR is secondary service cidr for cluster.
SecondaryServiceCIDR *net.IPNet
// NodeCIDRMaskSizes is list of node cidr mask sizes.
NodeCIDRMaskSizes []int
}
// CIDRs are reserved, then node resource is patched with them.
// nodeReservedCIDRs holds the reservation info for a node.
type nodeReservedCIDRs struct {
allocatedCIDRs []*net.IPNet
nodeName string
}
// New creates a new CIDR range allocator.
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
nodeList, err := listNodes(kubeClient)
if err != nil {
return nil, err
}
switch allocatorType {
case RangeAllocatorType:
return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList)
case MultiCIDRRangeAllocatorType:
if !utilfeature.DefaultFeatureGate.Enabled(features.MultiCIDRRangeAllocator) {
return nil, fmt.Errorf("invalid CIDR allocator type: %v, feature gate %v must be enabled", allocatorType, features.MultiCIDRRangeAllocator)
}
return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, nil)
case CloudAllocatorType:
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer)
default:
return nil, fmt.Errorf("invalid CIDR allocator type: %v", allocatorType)
}
}
func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
var nodeList *v1.NodeList
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
if pollErr := wait.Poll(nodePollInterval, apiserverStartupGracePeriod, func() (bool, error) {
var err error
nodeList, err = kubeClient.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{
FieldSelector: fields.Everything().String(),
LabelSelector: labels.Everything().String(),
})
if err != nil {
klog.Errorf("Failed to list all nodes: %v", err)
return false, nil
}
return true, nil
}); pollErr != nil {
return nil, fmt.Errorf("failed to list all nodes in %v, cannot proceed without updating CIDR map",
apiserverStartupGracePeriod)
}
return nodeList, nil
}
// ipnetToStringList converts a slice of net.IPNet into a list of CIDR in string format
func ipnetToStringList(inCIDRs []*net.IPNet) []string {
outCIDRs := make([]string, len(inCIDRs))
for idx, inCIDR := range inCIDRs {
outCIDRs[idx] = inCIDR.String()
}
return outCIDRs
}