diff --git a/pkg/controller/nodeipam/ipam/adapter.go b/pkg/controller/nodeipam/ipam/adapter.go index bfbfb9e24169..10939a0d974f 100644 --- a/pkg/controller/nodeipam/ipam/adapter.go +++ b/pkg/controller/nodeipam/ipam/adapter.go @@ -21,6 +21,7 @@ package ipam import ( "context" "encoding/json" + "fmt" "net" "k8s.io/klog" @@ -60,8 +61,12 @@ func newAdapter(k8s clientset.Interface, cloud *gce.Cloud) *adapter { return ret } -func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) { - cidrs, err := a.cloud.AliasRanges(types.NodeName(nodeName)) +func (a *adapter) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) { + if node.Spec.ProviderID == "" { + return nil, fmt.Errorf("node %s doesn't have providerID", node.Name) + } + + cidrs, err := a.cloud.AliasRangesByProviderID(node.Spec.ProviderID) if err != nil { return nil, err } @@ -72,7 +77,7 @@ func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error case 1: break default: - klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", nodeName, cidrs) + klog.Warningf("Node %q has more than one alias assigned (%v), defaulting to the first", node.Name, cidrs) } _, cidrRange, err := net.ParseCIDR(cidrs[0]) @@ -83,8 +88,12 @@ func (a *adapter) Alias(ctx context.Context, nodeName string) (*net.IPNet, error return cidrRange, nil } -func (a *adapter) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error { - return a.cloud.AddAliasToInstance(types.NodeName(nodeName), cidrRange) +func (a *adapter) AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error { + if node.Spec.ProviderID == "" { + return fmt.Errorf("node %s doesn't have providerID", node.Name) + } + + return a.cloud.AddAliasToInstanceByProviderID(node.Spec.ProviderID, cidrRange) } func (a *adapter) Node(ctx context.Context, name string) (*v1.Node, error) { diff --git a/pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go b/pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go index 19adb41026c4..63975dc76c10 100644 --- a/pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go +++ b/pkg/controller/nodeipam/ipam/cloud_cidr_allocator.go @@ -249,8 +249,11 @@ func (ca *cloudCIDRAllocator) updateCIDRAllocation(nodeName string) error { klog.Errorf("Failed while getting node %v for updating Node.Spec.PodCIDR: %v", nodeName, err) return err } + if node.Spec.ProviderID == "" { + return fmt.Errorf("node %s doesn't have providerID", nodeName) + } - cidrs, err := ca.cloud.AliasRanges(types.NodeName(nodeName)) + cidrs, err := ca.cloud.AliasRangesByProviderID(node.Spec.ProviderID) if err != nil { nodeutil.RecordNodeStatusChange(ca.recorder, node, "CIDRNotAvailable") return fmt.Errorf("failed to allocate cidr: %v", err) diff --git a/pkg/controller/nodeipam/ipam/sync/sync.go b/pkg/controller/nodeipam/ipam/sync/sync.go index ee95392b8ff9..733f76c592cc 100644 --- a/pkg/controller/nodeipam/ipam/sync/sync.go +++ b/pkg/controller/nodeipam/ipam/sync/sync.go @@ -43,9 +43,9 @@ const ( // cloudAlias is the interface to the cloud platform APIs. type cloudAlias interface { // Alias returns the IP alias for the node. - Alias(ctx context.Context, nodeName string) (*net.IPNet, error) + Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) // AddAlias adds an alias to the node. - AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error + AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error } // kubeAPI is the interface to the Kubernetes APIs. @@ -204,7 +204,7 @@ func (op *updateOp) run(sync *NodeSync) error { op.node = node } - aliasRange, err := sync.cloudAlias.Alias(ctx, sync.nodeName) + aliasRange, err := sync.cloudAlias.Alias(ctx, op.node) if err != nil { klog.Errorf("Error getting cloud alias for node %q: %v", sync.nodeName, err) return err @@ -293,7 +293,7 @@ func (op *updateOp) updateAliasFromNode(ctx context.Context, sync *NodeSync, nod return err } - if err := sync.cloudAlias.AddAlias(ctx, node.Name, aliasRange); err != nil { + if err := sync.cloudAlias.AddAlias(ctx, node, aliasRange); err != nil { klog.Errorf("Could not add alias %v for node %q: %v", aliasRange, node.Name, err) return err } @@ -325,7 +325,7 @@ func (op *updateOp) allocateRange(ctx context.Context, sync *NodeSync, node *v1. // If addAlias returns a hard error, cidrRange will be leaked as there // is no durable record of the range. The missing space will be // recovered on the next restart of the controller. - if err := sync.cloudAlias.AddAlias(ctx, node.Name, cidrRange); err != nil { + if err := sync.cloudAlias.AddAlias(ctx, node, cidrRange); err != nil { klog.Errorf("Could not add alias %v for node %q: %v", cidrRange, node.Name, err) return err } diff --git a/pkg/controller/nodeipam/ipam/sync/sync_test.go b/pkg/controller/nodeipam/ipam/sync/sync_test.go index 8c80b2c6453b..a10da6e2f458 100644 --- a/pkg/controller/nodeipam/ipam/sync/sync_test.go +++ b/pkg/controller/nodeipam/ipam/sync/sync_test.go @@ -58,13 +58,13 @@ type fakeAPIs struct { results []error } -func (f *fakeAPIs) Alias(ctx context.Context, nodeName string) (*net.IPNet, error) { - f.calls = append(f.calls, fmt.Sprintf("alias %v", nodeName)) +func (f *fakeAPIs) Alias(ctx context.Context, node *v1.Node) (*net.IPNet, error) { + f.calls = append(f.calls, fmt.Sprintf("alias %v", node.Name)) return f.aliasRange, f.aliasErr } -func (f *fakeAPIs) AddAlias(ctx context.Context, nodeName string, cidrRange *net.IPNet) error { - f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", nodeName, cidrRange)) +func (f *fakeAPIs) AddAlias(ctx context.Context, node *v1.Node, cidrRange *net.IPNet) error { + f.calls = append(f.calls, fmt.Sprintf("addAlias %v %v", node.Name, cidrRange)) return f.addAliasErr } diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go index c4c33e446ba1..cae825c12038 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce.go @@ -99,6 +99,13 @@ type Cloud struct { // for the cloudprovider to start watching the configmap. ClusterID ClusterID + // initializer is used for lazy initialization of subnetworkURL + // and isLegacyNetwork fields if they are not passed via the config. + // The reason is to avoid GCE API calls to initialize them if they + // will never be used. This is especially important when + // it is run from Kubelets, as there can be thousands of them. + subnetworkURLAndIsLegacyNetworkInitializer sync.Once + service *compute.Service serviceBeta *computebeta.Service serviceAlpha *computealpha.Service @@ -115,10 +122,14 @@ type Cloud struct { // managedZones will be set to the 1 zone if running a single zone cluster // it will be set to ALL zones in region for any multi-zone cluster // Use GetAllCurrentZones to get only zones that contain nodes - managedZones []string - networkURL string - isLegacyNetwork bool - subnetworkURL string + managedZones []string + networkURL string + // unsafeIsLegacyNetwork should be used only via IsLegacyNetwork() accessor, + // to ensure it was properly initialized. + unsafeIsLegacyNetwork bool + // unsafeSubnetworkURL should be used only via SubnetworkURL() accessor, + // to ensure it was properly initialized. + unsafeSubnetworkURL string secondaryRangeName string networkProjectID string onXPN bool @@ -465,32 +476,12 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) { subnetURL = config.SubnetworkURL } else if config.SubnetworkName != "" { subnetURL = gceSubnetworkURL(config.APIEndpoint, netProjID, config.Region, config.SubnetworkName) - } else { - // Determine the type of network and attempt to discover the correct subnet for AUTO mode. - // Gracefully fail because kubelet calls CreateGCECloud without any config, and minions - // lack the proper credentials for API calls. - if networkName := lastComponent(networkURL); networkName != "" { - var n *compute.Network - if n, err = getNetwork(service, netProjID, networkName); err != nil { - klog.Warningf("Could not retrieve network %q; err: %v", networkName, err) - } else { - switch typeOfNetwork(n) { - case netTypeLegacy: - klog.Infof("Network %q is type legacy - no subnetwork", networkName) - isLegacyNetwork = true - case netTypeCustom: - klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName) - case netTypeAuto: - subnetURL, err = determineSubnetURL(service, netProjID, networkName, config.Region) - if err != nil { - klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, config.Region, err) - } else { - klog.Infof("Auto selecting subnetwork %q", subnetURL) - } - } - } - } } + // If neither SubnetworkURL nor SubnetworkName are provided, defer to + // lazy initialization. Determining subnetURL and isLegacyNetwork requires + // GCE API call. Given that it's not used in many cases and the fact that + // the provider is initialized also for Kubelets (and there can be thousands + // of them) we defer to lazy initialization here. if len(config.ManagedZones) == 0 { config.ManagedZones, err = getZonesForRegion(service, config.ProjectID, config.Region) @@ -518,8 +509,8 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) { localZone: config.Zone, managedZones: config.ManagedZones, networkURL: networkURL, - isLegacyNetwork: isLegacyNetwork, - subnetworkURL: subnetURL, + unsafeIsLegacyNetwork: isLegacyNetwork, + unsafeSubnetworkURL: subnetURL, secondaryRangeName: config.SecondaryRangeName, nodeTags: config.NodeTags, nodeInstancePrefix: config.NodeInstancePrefix, @@ -542,6 +533,45 @@ func CreateGCECloud(config *CloudConfig) (*Cloud, error) { return gce, nil } +// initializeNetworkConfig() is supposed to be called under sync.Once() +// for accessors to subnetworkURL and isLegacyNetwork fields. +func (g *Cloud) initializeSubnetworkURLAndIsLegacyNetwork() { + if g.unsafeSubnetworkURL != "" { + // This has already been initialized via the config. + return + } + + var subnetURL string + var isLegacyNetwork bool + + // Determine the type of network and attempt to discover the correct subnet for AUTO mode. + // Gracefully fail because kubelet calls CreateGCECloud without any config, and minions + // lack the proper credentials for API calls. + if networkName := lastComponent(g.NetworkURL()); networkName != "" { + if n, err := getNetwork(g.service, g.NetworkProjectID(), networkName); err != nil { + klog.Warningf("Could not retrieve network %q; err: %v", networkName, err) + } else { + switch typeOfNetwork(n) { + case netTypeLegacy: + klog.Infof("Network %q is type legacy - no subnetwork", networkName) + isLegacyNetwork = true + case netTypeCustom: + klog.Warningf("Network %q is type custom - cannot auto select a subnetwork", networkName) + case netTypeAuto: + subnetURL, err = determineSubnetURL(g.service, g.NetworkProjectID(), networkName, g.Region()) + if err != nil { + klog.Warningf("Could not determine subnetwork for network %q and region %v; err: %v", networkName, g.Region(), err) + } else { + klog.Infof("Auto selecting subnetwork %q", subnetURL) + } + } + } + } + + g.unsafeSubnetworkURL = subnetURL + g.unsafeIsLegacyNetwork = isLegacyNetwork +} + // SetRateLimiter adds a custom cloud.RateLimiter implementation. // WARNING: Calling this could have unexpected behavior if you have in-flight // requests. It is best to use this immediately after creating a Cloud. @@ -672,12 +702,14 @@ func (g *Cloud) NetworkURL() string { // SubnetworkURL returns the subnetwork url func (g *Cloud) SubnetworkURL() string { - return g.subnetworkURL + g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork) + return g.unsafeSubnetworkURL } // IsLegacyNetwork returns true if the cluster is still running a legacy network configuration. func (g *Cloud) IsLegacyNetwork() bool { - return g.isLegacyNetwork + g.subnetworkURLAndIsLegacyNetworkInitializer.Do(g.initializeSubnetworkURLAndIsLegacyNetwork) + return g.unsafeIsLegacyNetwork } // SetInformers sets up the zone handlers we need watching for node changes. diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_instances.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_instances.go index 15d7b6f2e3f7..f0881f7f0977 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_instances.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_instances.go @@ -361,21 +361,20 @@ func (g *Cloud) CurrentNodeName(ctx context.Context, hostname string) (types.Nod return types.NodeName(hostname), nil } -// AliasRanges returns a list of CIDR ranges that are assigned to the +// AliasRangesByProviderID returns a list of CIDR ranges that are assigned to the // `node` for allocation to pods. Returns a list of the form // "/". -func (g *Cloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) { +func (g *Cloud) AliasRangesByProviderID(providerID string) (cidrs []string, err error) { ctx, cancel := cloud.ContextWithCallTimeout() defer cancel() - var instance *gceInstance - instance, err = g.getInstanceByName(mapNodeNameToInstanceName(nodeName)) + _, zone, name, err := splitProviderID(providerID) if err != nil { - return + return nil, err } var res *computebeta.Instance - res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone))) + res, err = g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone)) if err != nil { return } @@ -388,28 +387,29 @@ func (g *Cloud) AliasRanges(nodeName types.NodeName) (cidrs []string, err error) return } -// AddAliasToInstance adds an alias to the given instance from the named +// AddAliasToInstanceByProviderID adds an alias to the given instance from the named // secondary range. -func (g *Cloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNet) error { +func (g *Cloud) AddAliasToInstanceByProviderID(providerID string, alias *net.IPNet) error { ctx, cancel := cloud.ContextWithCallTimeout() defer cancel() - v1instance, err := g.getInstanceByName(mapNodeNameToInstanceName(nodeName)) + _, zone, name, err := splitProviderID(providerID) if err != nil { return err } - instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(v1instance.Name, lastComponent(v1instance.Zone))) + + instance, err := g.c.BetaInstances().Get(ctx, meta.ZonalKey(canonicalizeInstanceName(name), zone)) if err != nil { return err } switch len(instance.NetworkInterfaces) { case 0: - return fmt.Errorf("instance %q has no network interfaces", nodeName) + return fmt.Errorf("instance %q has no network interfaces", providerID) case 1: default: klog.Warningf("Instance %q has more than one network interface, using only the first (%v)", - nodeName, instance.NetworkInterfaces) + providerID, instance.NetworkInterfaces) } iface := &computebeta.NetworkInterface{} @@ -420,7 +420,7 @@ func (g *Cloud) AddAliasToInstance(nodeName types.NodeName, alias *net.IPNet) er SubnetworkRangeName: g.secondaryRangeName, }) - mc := newInstancesMetricContext("add_alias", v1instance.Zone) + mc := newInstancesMetricContext("add_alias", zone) err = g.c.BetaInstances().UpdateNetworkInterface(ctx, meta.ZonalKey(instance.Name, lastComponent(instance.Zone)), iface.Name, iface) return mc.Observe(err) } diff --git a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go index 30546d324092..721a89c198b4 100644 --- a/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go +++ b/staging/src/k8s.io/legacy-cloud-providers/gce/gce_loadbalancer_internal.go @@ -57,7 +57,7 @@ func (g *Cloud) ensureInternalLoadBalancer(clusterName, clusterID string, svc *v } scheme := cloud.SchemeInternal options := getILBOptions(svc) - if g.isLegacyNetwork { + if g.IsLegacyNetwork() { g.eventRecorder.Event(svc, v1.EventTypeWarning, "ILBOptionsIgnored", "Internal LoadBalancer options are not supported with Legacy Networks.") options = ILBOptions{} }