Skip to content

Commit

Permalink
Introduce MultiCIDRRangeAllocator
Browse files Browse the repository at this point in the history
MultiCIDRRangeAllocator is a new Range Allocator which makes using
multiple ClusterCIDRs possible. It consists of two controllers, one for
reconciling the ClusterCIDR API objects and the other for allocating
Pod CIDRs to the nodes.

The allocation is based on the rules defined in
https://github.com/kubernetes/enhancements/tree/master/keps/sig-network/2593-multiple-cluster-cidrs
  • Loading branch information
sarveshr7 committed Aug 2, 2022
1 parent ad1ff5e commit 14d9ad0
Show file tree
Hide file tree
Showing 15 changed files with 3,182 additions and 73 deletions.
1 change: 1 addition & 0 deletions cmd/cloud-controller-manager/.import-restrictions
Expand Up @@ -39,4 +39,5 @@ rules:
- k8s.io/kubernetes/pkg/util/taints
- k8s.io/kubernetes/pkg/proxy/util
- k8s.io/kubernetes/pkg/proxy/util/testing
- k8s.io/kubernetes/pkg/util/slice
- k8s.io/kubernetes/pkg/util/sysctl
1 change: 1 addition & 0 deletions cmd/cloud-controller-manager/nodeipamcontroller.go
Expand Up @@ -122,6 +122,7 @@ func startNodeIpamController(initContext app.ControllerInitContext, ccmConfig *c

nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
ctx.InformerFactory.Core().V1().Nodes(),
ctx.InformerFactory.Networking().V1alpha1().ClusterCIDRs(),
cloud,
ctx.ClientBuilder.ClientOrDie(initContext.ClientName),
clusterCIDRs,
Expand Down
1 change: 1 addition & 0 deletions cmd/kube-controller-manager/app/core.go
Expand Up @@ -155,6 +155,7 @@ func startNodeIpamController(ctx context.Context, controllerContext ControllerCo

nodeIpamController, err := nodeipamcontroller.NewNodeIpamController(
controllerContext.InformerFactory.Core().V1().Nodes(),
controllerContext.InformerFactory.Networking().V1alpha1().ClusterCIDRs(),
controllerContext.Cloud,
controllerContext.ClientBuilder.ClientOrDie("node-controller"),
clusterCIDRs,
Expand Down
11 changes: 11 additions & 0 deletions hack/local-up-cluster.sh
Expand Up @@ -37,6 +37,10 @@ FAIL_SWAP_ON=${FAIL_SWAP_ON:-"false"}
# Name of the dns addon, eg: "kube-dns" or "coredns"
DNS_ADDON=${DNS_ADDON:-"coredns"}
CLUSTER_CIDR=${CLUSTER_CIDR:-10.1.0.0/16}
ALLOCATE_NODE_CIDRS=${ALLOCATE_NODE_CIDRS:-false}
RANGE_ALLOCATOR=${RANGE_ALLOCATOR:-"RangeAllocator"}
DUALSTACK_ENABLED=${DUALSTACK_ENABLED:-"false"}

SERVICE_CLUSTER_IP_RANGE=${SERVICE_CLUSTER_IP_RANGE:-10.0.0.0/24}
FIRST_SERVICE_CLUSTER_IP=${FIRST_SERVICE_CLUSTER_IP:-10.0.0.1}
# if enabled, must set CGROUP_ROOT
Expand Down Expand Up @@ -631,6 +635,12 @@ function start_controller_manager {
cloud_config_arg+=("--cloud-config=${CLOUD_CONFIG}")
fi

node_ipam_arg=("--allocate-node-cidrs=${ALLOCATE_NODE_CIDRS}" "--cluster-cidr=${CLUSTER_CIDR}")
node_ipam_arg+=("--cidr-allocator-type=${RANGE_ALLOCATOR}")
if [ "${DUALSTACK_ENABLED}" == "true" ]; then
node_ipam_arg+=("--node-cidr-mask-size-ipv4=24" "--node-cidr-mask-size-ipv6=120")
fi

CTLRMGR_LOG=${LOG_DIR}/kube-controller-manager.log
${CONTROLPLANE_SUDO} "${GO_OUT}/kube-controller-manager" \
--v="${LOG_LEVEL}" \
Expand All @@ -644,6 +654,7 @@ function start_controller_manager {
--pvclaimbinder-sync-period="${CLAIM_BINDER_SYNC_PERIOD}" \
--feature-gates="${FEATURE_GATES}" \
"${cloud_config_arg[@]}" \
"${node_ipam_arg[@]}" \
--authentication-kubeconfig "${CERT_DIR}"/controller.kubeconfig \
--authorization-kubeconfig "${CERT_DIR}"/controller.kubeconfig \
--kubeconfig "${CERT_DIR}"/controller.kubeconfig \
Expand Down
74 changes: 66 additions & 8 deletions pkg/controller/nodeipam/ipam/cidr_allocator.go
Expand Up @@ -22,16 +22,17 @@ import (
"net"
"time"

"k8s.io/klog/v2"

"k8s.io/api/core/v1"
networkingv1alpha1 "k8s.io/api/networking/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/wait"
informers "k8s.io/client-go/informers/core/v1"
networkinginformers "k8s.io/client-go/informers/networking/v1alpha1"
clientset "k8s.io/client-go/kubernetes"
cloudprovider "k8s.io/cloud-provider"
"k8s.io/klog/v2"
)

// CIDRAllocatorType is the type of the allocator to use.
Expand All @@ -41,6 +42,9 @@ const (
// RangeAllocatorType is the allocator that uses an internal CIDR
// range allocator to do node CIDR range allocations.
RangeAllocatorType CIDRAllocatorType = "RangeAllocator"
// MultiCIDRRangeAllocatorType is the allocator that uses an internal CIDR
// range allocator to do node CIDR range allocations.
MultiCIDRRangeAllocatorType CIDRAllocatorType = "MultiCIDRRangeAllocator"
// CloudAllocatorType is the allocator that uses cloud platform
// support to do node CIDR range allocations.
CloudAllocatorType CIDRAllocatorType = "CloudAllocator"
Expand Down Expand Up @@ -87,7 +91,7 @@ type CIDRAllocator interface {
// CIDR if it doesn't currently have one or mark the CIDR as used if
// the node already have one.
AllocateOrOccupyCIDR(node *v1.Node) error
// ReleaseCIDR releases the CIDR of the removed node
// ReleaseCIDR releases the CIDR of the removed node.
ReleaseCIDR(node *v1.Node) error
// Run starts all the working logic of the allocator.
Run(stopCh <-chan struct{})
Expand All @@ -96,18 +100,25 @@ type CIDRAllocator interface {
// CIDRAllocatorParams is parameters that's required for creating new
// cidr range allocator.
type CIDRAllocatorParams struct {
// ClusterCIDRs is list of cluster cidrs
// ClusterCIDRs is list of cluster cidrs.
ClusterCIDRs []*net.IPNet
// ServiceCIDR is primary service cidr for cluster
// ServiceCIDR is primary service cidr for cluster.
ServiceCIDR *net.IPNet
// SecondaryServiceCIDR is secondary service cidr for cluster
// SecondaryServiceCIDR is secondary service cidr for cluster.
SecondaryServiceCIDR *net.IPNet
// NodeCIDRMaskSizes is list of node cidr mask sizes
// NodeCIDRMaskSizes is list of node cidr mask sizes.
NodeCIDRMaskSizes []int
}

// CIDRs are reserved, then node resource is patched with them.
// nodeReservedCIDRs holds the reservation info for a node.
type nodeReservedCIDRs struct {
allocatedCIDRs []*net.IPNet
nodeName string
}

// New creates a new CIDR range allocator.
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInformer informers.NodeInformer, clusterCIDRInformer networkinginformers.ClusterCIDRInformer, allocatorType CIDRAllocatorType, allocatorParams CIDRAllocatorParams) (CIDRAllocator, error) {
nodeList, err := listNodes(kubeClient)
if err != nil {
return nil, err
Expand All @@ -116,6 +127,12 @@ func New(kubeClient clientset.Interface, cloud cloudprovider.Interface, nodeInfo
switch allocatorType {
case RangeAllocatorType:
return NewCIDRRangeAllocator(kubeClient, nodeInformer, allocatorParams, nodeList)
case MultiCIDRRangeAllocatorType:
cccList, err := listClusterCIDRs(kubeClient)
if err != nil {
return nil, err
}
return NewMultiCIDRRangeAllocator(kubeClient, nodeInformer, clusterCIDRInformer, allocatorParams, nodeList, cccList, nil)
case CloudAllocatorType:
return NewCloudCIDRAllocator(kubeClient, cloud, nodeInformer)
default:
Expand Down Expand Up @@ -144,3 +161,44 @@ func listNodes(kubeClient clientset.Interface) (*v1.NodeList, error) {
}
return nodeList, nil
}

func listClusterCIDRs(kubeClient clientset.Interface) (*networkingv1alpha1.ClusterCIDRList, error) {
var clusterCIDRList *networkingv1alpha1.ClusterCIDRList
// We must poll because apiserver might not be up. This error causes
// controller manager to restart.
startTimestamp := time.Now()

// start with 2s, multiply the duration by 1.6 each step, 11 steps = 9.7 minutes
backoff := wait.Backoff{
Duration: 2 * time.Second,
Factor: 1.6,
Steps: 11,
}

if pollErr := wait.ExponentialBackoff(backoff, func() (bool, error) {
var err error
clusterCIDRList, err = kubeClient.NetworkingV1alpha1().ClusterCIDRs().List(context.TODO(), metav1.ListOptions{
FieldSelector: fields.Everything().String(),
LabelSelector: labels.Everything().String(),
})
if err != nil {
klog.Errorf("Failed to list all clusterCIDRs: %v", err)
return false, nil
}
return true, nil
}); pollErr != nil {
klog.Errorf("Failed to list clusterCIDRs (after %v)", time.Now().Sub(startTimestamp))
return nil, fmt.Errorf("failed to list all clusterCIDRs in %v, cannot proceed without updating CIDR map",
apiserverStartupGracePeriod)
}
return clusterCIDRList, nil
}

// ipnetToStringList converts a slice of net.IPNet into a list of CIDR in string format
func ipnetToStringList(inCIDRs []*net.IPNet) []string {
outCIDRs := make([]string, len(inCIDRs))
for idx, inCIDR := range inCIDRs {
outCIDRs[idx] = inCIDR.String()
}
return outCIDRs
}

0 comments on commit 14d9ad0

Please sign in to comment.