Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #90218: Lazy initialization of network urls for GCE provider #90242: Avoid unnecessary GCE API calls for IP-alias calls #96864

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 14 additions & 5 deletions pkg/controller/nodeipam/ipam/adapter.go
Expand Up @@ -21,6 +21,7 @@ package ipam
import (
"context"
"encoding/json"
"fmt"
"net"

"k8s.io/klog"
Expand Down Expand Up @@ -60,8 +61,12 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter {
return ret
}

func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) {
cidrs, err := a.cloud.AliasRanges(types.NodeName(nodeName))
func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
if node.Spec.ProviderID == "" {
return nil, fmt.Errorf("node %s doesn't have providerID", node.Name)
}

cidrs, err := a.cloud.AliasRangesByProviderID(node.Spec.ProviderID)
if err != nil {
return nil, err
}
Expand All @@ -72,7 +77,7 @@ func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error
case 1:
break
default:
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", nodeName, cidrs)
klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs)
}

_, cidrRange, err := net.ParseCIDR(cidrs[0])
Expand All @@ -83,8 +88,12 @@ func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error
return cidrRange, nil
}

func (a *adapter) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error {
return a.cloud.AddAliasToInstance(types.NodeName(nodeName), cidrRange)
func (a *adapter) AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error {
if node.Spec.ProviderID == "" {
return fmt.Errorf("node %s doesn't have providerID", node.Name)
}

return a.cloud.AddAliasToInstanceByProviderID(node.Spec.ProviderID, cidrRange)
}

func (a *adapter) Node(ctx context.Context, name string) (*v1.Node, error) {
Expand Down
5 changes: 4 additions & 1 deletion pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go
Expand Up @@ -249,8 +249,11 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error {
klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDR: %v", nodeName, err)
return err
}
if node.Spec.ProviderID == "" {
return fmt.Errorf("node %s doesn't have providerID", nodeName)
}

cidrs, err := ca.cloud.AliasRanges(types.NodeName(nodeName))
cidrs, err := ca.cloud.AliasRangesByProviderID(node.Spec.ProviderID)
if err != nil {
nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable")
return fmt.Errorf("failed to allocate cidr: %v", err)
Expand Down
10 changes: 5 additions & 5 deletions pkg/controller/nodeipam/ipam/sync/sync.go
Expand Up @@ -43,9 +43,9 @@ const (
// cloudAlias is the interface to the cloud platform APIs.
type cloudAlias interface {
// Alias returns the IP alias for the node.
Alias(ctx context.Context, nodeName string) (*net.IPNet, error)
Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error)
// AddAlias adds an alias to the node.
AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error
AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error
}

// kubeAPI is the interface to the Kubernetes APIs.
Expand Down Expand Up @@ -204,7 +204,7 @@ func (op *updateOp) run(sync *NodeSync) error {
op.node = node
}

aliasRange, err := sync.cloudAlias.Alias(ctx, sync.nodeName)
aliasRange, err := sync.cloudAlias.Alias(ctx, op.node)
if err != nil {
klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err)
return err
Expand Down Expand Up @@ -293,7 +293,7 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod
return err
}

if err := sync.cloudAlias.AddAlias(ctx, node.Name, aliasRange); err != nil {
if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err)
return err
}
Expand Down Expand Up @@ -325,7 +325,7 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1.
// If addAlias returns a hard error, cidrRange will be leaked as there
// is no durable record of the range. The missing space will be
// recovered on the next restart of the controller.
if err := sync.cloudAlias.AddAlias(ctx, node.Name, cidrRange); err != nil {
if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil {
klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err)
return err
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/nodeipam/ipam/sync/sync_test.go
Expand Up @@ -58,13 +58,13 @@ type fakeAPIs struct {
results []error
}

func (f *fakeAPIs) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) {
f.calls = append(f.calls, fmt.Sprintf("alias %v", nodeName))
func (f *fakeAPIs) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) {
f.calls = append(f.calls, fmt.Sprintf("alias %v", node.Name))
return f.aliasRange, f.aliasErr
}

func (f *fakeAPIs) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error {
f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", nodeName, cidrRange))
func (f *fakeAPIs) AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error {
f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", node.Name, cidrRange))
return f.addAliasErr
}

Expand Down
98 changes: 65 additions & 33 deletions staging/src/k8s.io/legacy-cloud-providers/gce/gce.go
Expand Up @@ -99,6 +99,13 @@ type Cloud struct {
// for the cloudprovider to start watching the configmap.
ClusterID ClusterID

// initializer is used for lazy initialization of subnetworkURL
// and isLegacyNetwork fields if they are not passed via the config.
// The reason is to avoid GCE API calls to initialize them if they
// will never be used. This is especially important when
// it is run from Kubelets, as there can be thousands of them.
subnetworkURLAndIsLegacyNetworkInitializer sync.Once

service *compute.Service
serviceBeta *computebeta.Service
serviceAlpha *computealpha.Service
Expand All @@ -115,10 +122,14 @@ type Cloud struct {
// managedZones will be set to the 1 zone if running a single zone cluster
// it will be set to ALL zones in region for any multi-zone cluster
// Use GetAllCurrentZones to get only zones that contain nodes
managedZones []string
networkURL string
isLegacyNetwork bool
subnetworkURL string
managedZones []string
networkURL string
// unsafeIsLegacyNetwork should be used only via IsLegacyNetwork() accessor,
// to ensure it was properly initialized.
unsafeIsLegacyNetwork bool
// unsafeSubnetworkURL should be used only via SubnetworkURL() accessor,
// to ensure it was properly initialized.
unsafeSubnetworkURL string
secondaryRangeName string
networkProjectID string
onXPN bool
Expand Down Expand Up @@ -465,32 +476,12 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
subnetURL = config.SubnetworkURL
} else if config.SubnetworkName != "" {
subnetURL = gceSubnetworkURL(config.APIEndpoint, netProjID, config.Region, config.SubnetworkName)
} else {
// Determine the type of network and attempt to discover the correct subnet for AUTO mode.
// Gracefully fail because kubelet calls CreateGCECloud without any config, and minions
// lack the proper credentials for API calls.
if networkName := lastComponent(networkURL); networkName != "" {
var n *compute.Network
if n, err = getNetwork(service, netProjID, networkName); err != nil {
klog.Warningf("Could not retrieve network %q; err: %v", networkName, err)
} else {
switch typeOfNetwork(n) {
case netTypeLegacy:
klog.Infof("Network %q is type legacy - no subnetwork", networkName)
isLegacyNetwork = true
case netTypeCustom:
klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName)
case netTypeAuto:
subnetURL, err = determineSubnetURL(service, netProjID, networkName, config.Region)
if err != nil {
klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, config.Region, err)
} else {
klog.Infof("Auto selecting subnetwork %q", subnetURL)
}
}
}
}
}
// If neither SubnetworkURL nor SubnetworkName are provided, defer to
// lazy initialization. Determining subnetURL and isLegacyNetwork requires
// GCE API call. Given that it's not used in many cases and the fact that
// the provider is initialized also for Kubelets (and there can be thousands
// of them) we defer to lazy initialization here.

if len(config.ManagedZones) == 0 {
config.ManagedZones, err = getZonesForRegion(service, config.ProjectID, config.Region)
Expand Down Expand Up @@ -518,8 +509,8 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
localZone: config.Zone,
managedZones: config.ManagedZones,
networkURL: networkURL,
isLegacyNetwork: isLegacyNetwork,
subnetworkURL: subnetURL,
unsafeIsLegacyNetwork: isLegacyNetwork,
unsafeSubnetworkURL: subnetURL,
secondaryRangeName: config.SecondaryRangeName,
nodeTags: config.NodeTags,
nodeInstancePrefix: config.NodeInstancePrefix,
Expand All @@ -542,6 +533,45 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) {
return gce, nil
}

// initializeNetworkConfig() is supposed to be called under sync.Once()
// for accessors to subnetworkURL and isLegacyNetwork fields.
func (g *Cloud) initializeSubnetworkURLAndIsLegacyNetwork() {
if g.unsafeSubnetworkURL != "" {
// This has already been initialized via the config.
return
}

var subnetURL string
var isLegacyNetwork bool

// Determine the type of network and attempt to discover the correct subnet for AUTO mode.
// Gracefully fail because kubelet calls CreateGCECloud without any config, and minions
// lack the proper credentials for API calls.
if networkName := lastComponent(g.NetworkURL()); networkName != "" {
if n, err := getNetwork(g.service, g.NetworkProjectID(), networkName); err != nil {
klog.Warningf("Could not retrieve network %q; err: %v", networkName, err)
} else {
switch typeOfNetwork(n) {
case netTypeLegacy:
klog.Infof("Network %q is type legacy - no subnetwork", networkName)
isLegacyNetwork = true
case netTypeCustom:
klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName)
case netTypeAuto:
subnetURL, err = determineSubnetURL(g.service, g.NetworkProjectID(), networkName, g.Region())
if err != nil {
klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, g.Region(), err)
} else {
klog.Infof("Auto selecting subnetwork %q", subnetURL)
}
}
}
}

g.unsafeSubnetworkURL = subnetURL
g.unsafeIsLegacyNetwork = isLegacyNetwork
}

// SetRateLimiter adds a custom cloud.RateLimiter implementation.
// WARNING: Calling this could have unexpected behavior if you have in-flight
// requests. It is best to use this immediately after creating a Cloud.
Expand Down Expand Up @@ -672,12 +702,14 @@ func (g *Cloud) NetworkURL() string {

// SubnetworkURL returns the subnetwork url
func (g *Cloud) SubnetworkURL() string {
return g.subnetworkURL
g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork)
return g.unsafeSubnetworkURL
}

// IsLegacyNetwork returns true if the cluster is still running a legacy network configuration.
func (g *Cloud) IsLegacyNetwork() bool {
return g.isLegacyNetwork
g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork)
return g.unsafeIsLegacyNetwork
}

// SetInformers sets up the zone handlers we need watching for node changes.
Expand Down
26 changes: 13 additions & 13 deletions staging/src/k8s.io/legacy-cloud-providers/gce/gce_instances.go
Expand Up @@ -361,21 +361,20 @@ func (g *Cloud) CurrentNodeName(ctx context.Context, hostname string) (types.Nod
return types.NodeName(hostname), nil
}

// AliasRanges returns a list of CIDR ranges that are assigned to the
// AliasRangesByProviderID returns a list of CIDR ranges that are assigned to the
// `node` for allocation to pods. Returns a list of the form
// "<ip>/<netmask>".
func (g *Cloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) {
func (g *Cloud) AliasRangesByProviderID(providerID string) (cidrs []string, err error) {
ctx, cancel := cloud.ContextWithCallTimeout()
defer cancel()

var instance *gceInstance
instance, err = g.getInstanceByName(mapNodeNameToInstanceName(nodeName))
_, zone, name, err := splitProviderID(providerID)
if err != nil {
return
return nil, err
}

var res *computebeta.Instance
res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone)))
res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
if err != nil {
return
}
Expand All @@ -388,28 +387,29 @@ func (g *Cloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error)
return
}

// AddAliasToInstance adds an alias to the given instance from the named
// AddAliasToInstanceByProviderID adds an alias to the given instance from the named
// secondary range.
func (g *Cloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNet) error {
func (g *Cloud) AddAliasToInstanceByProviderID(providerID string, alias *net.IPNet) error {
ctx, cancel := cloud.ContextWithCallTimeout()
defer cancel()

v1instance, err := g.getInstanceByName(mapNodeNameToInstanceName(nodeName))
_, zone, name, err := splitProviderID(providerID)
if err != nil {
return err
}
instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(v1instance.Name, lastComponent(v1instance.Zone)))

instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone))
if err != nil {
return err
}

switch len(instance.NetworkInterfaces) {
case 0:
return fmt.Errorf("instance %q has no network interfaces", nodeName)
return fmt.Errorf("instance %q has no network interfaces", providerID)
case 1:
default:
klog.Warningf("Instance %q has more than one network interface, using only the first (%v)",
nodeName, instance.NetworkInterfaces)
providerID, instance.NetworkInterfaces)
}

iface := &computebeta.NetworkInterface{}
Expand All @@ -420,7 +420,7 @@ func (g *Cloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNet) er
SubnetworkRangeName: g.secondaryRangeName,
})

mc := newInstancesMetricContext("add_alias", v1instance.Zone)
mc := newInstancesMetricContext("add_alias", zone)
err = g.c.BetaInstances().UpdateNetworkInterface(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone)), iface.Name, iface)
return mc.Observe(err)
}
Expand Down
Expand Up @@ -57,7 +57,7 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v
}
scheme := cloud.SchemeInternal
options := getILBOptions(svc)
if g.isLegacyNetwork {
if g.IsLegacyNetwork() {
g.eventRecorder.Event(svc, v1.EventTypeWarning, "ILBOptionsIgnored", "Internal LoadBalancer options are not supported with Legacy Networks.")
options = ILBOptions{}
}
Expand Down