Skip to content

Commit

Permalink
Reuse instance-groups for internal load balancers
Browse files Browse the repository at this point in the history
Based on docs for internal loadbalancer here [1], backend services [2]
and instances in instance-groups [3], following restrictions apply:

- Internal LB can load balance to VMs in same region, but different
  subnets
- Instance groups for the backend service must contain instance of
  the same subnet
- An instance can only belong to one load balanced instance group
- It is probably useful use-case to have nodes for the cluster belong
  to more than one subnet. And the current setup fails to create an
  internal load balancer with nodes in multiple subnets.

This change finds pre-existing instance-groups that ONLY contain
instances that belong to the cluster, uses them for the backend
service. And only ensures instance-groups for remaining ones.

[1] https://cloud.google.com/load-balancing/docs/internal
[2] https://cloud.google.com/load-balancing/docs/backend-service#restrictions_and_guidance
[3] https://cloud.google.com/compute/docs/instance-groups/creating-groups-of-unmanaged-instances#addinstances

Co-authored-by: Abhinav Dahiya <abhinav.dahiya@redhat.com>
  • Loading branch information
Fedosin and abhinavdahiya committed Jan 28, 2022
1 parent d27c376 commit 9f6ed06
Show file tree
Hide file tree
Showing 5 changed files with 158 additions and 28 deletions.
21 changes: 14 additions & 7 deletions providers/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ type Cloud struct {
// stackType indicates whether the cluster is a single stack IPv4, single
// stack IPv6 or a dual stack cluster
stackType StackType

externalInstanceGroupsPrefix string // If non-"", finds prefixed instance groups for ILB.
}

// ConfigGlobal is the in memory representation of the gce.conf config data
Expand Down Expand Up @@ -213,6 +215,9 @@ type ConfigGlobal struct {
// Default to none.
// For example: MyFeatureFlag
AlphaFeatures []string `gcfg:"alpha-features"`
// ExternalInstanceGroupsPrefix, when not-empty, is used to filter instance groups
// and include them in the backend for ILB.
ExternalInstanceGroupsPrefix string `gcfg:"external-instance-groups-prefix"`
}

// ConfigFile is the struct used to parse the /etc/gce.conf configuration file.
Expand Down Expand Up @@ -240,13 +245,14 @@ type CloudConfig struct {
SubnetworkName string
SubnetworkURL string
// DEPRECATED: Do not rely on this value as it may be incorrect.
SecondaryRangeName string
NodeTags []string
NodeInstancePrefix string
TokenSource oauth2.TokenSource
UseMetadataServer bool
AlphaFeatureGate *AlphaFeatureGate
StackType string
SecondaryRangeName string
NodeTags []string
NodeInstancePrefix string
TokenSource oauth2.TokenSource
UseMetadataServer bool
AlphaFeatureGate *AlphaFeatureGate
StackType string
ExternalInstanceGroupsPrefix string
}

func init() {
Expand Down Expand Up @@ -336,6 +342,7 @@ func generateCloudConfig(configFile *ConfigFile) (cloudConfig *CloudConfig, err

cloudConfig.NodeTags = configFile.Global.NodeTags
cloudConfig.NodeInstancePrefix = configFile.Global.NodeInstancePrefix
cloudConfig.ExternalInstanceGroupsPrefix = configFile.Global.ExternalInstanceGroupsPrefix
cloudConfig.AlphaFeatureGate = NewAlphaFeatureGate(configFile.Global.AlphaFeatures)
}

Expand Down
2 changes: 1 addition & 1 deletion providers/gce/gce_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ func NewFakeGCECloud(vals TestClusterValues) *Cloud {
gce := &Cloud{
region: vals.Region,
service: service,
managedZones: []string{vals.ZoneName},
managedZones: []string{vals.ZoneName, vals.SecondaryZoneName},
projectID: vals.ProjectID,
networkProjectID: vals.ProjectID,
ClusterID: fakeClusterID(vals.ClusterID),
Expand Down
18 changes: 18 additions & 0 deletions providers/gce/gce_instancegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ limitations under the License.
package gce

import (
"fmt"

compute "google.golang.org/api/compute/v1"

"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud"
Expand Down Expand Up @@ -61,6 +63,22 @@ func (g *Cloud) ListInstanceGroups(zone string) ([]*compute.InstanceGroup, error
return v, mc.Observe(err)
}

// ListInstanceGroupsWithPrefix lists all InstanceGroups in the project and
// zone with given prefix.
// When the prefix is empty it lists all the instance groups.
func (g *Cloud) ListInstanceGroupsWithPrefix(zone string, prefix string) ([]*compute.InstanceGroup, error) {
ctx, cancel := cloud.ContextWithCallTimeout()
defer cancel()

mc := newInstanceGroupMetricContext("list", zone)
f := filter.None
if prefix != "" {
f = filter.Regexp("name", fmt.Sprintf("%s.*", prefix))
}
v, err := g.c.InstanceGroups().List(ctx, zone, f)
return v, mc.Observe(err)
}

// ListInstancesInInstanceGroup lists all the instances in a given
// instance group and state.
func (g *Cloud) ListInstancesInInstanceGroup(name string, zone string, state string) ([]*compute.InstanceWithNamedPorts, error) {
Expand Down
76 changes: 56 additions & 20 deletions providers/gce/gce_loadbalancer_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -543,30 +543,20 @@ func (g *Cloud) ensureInternalHealthCheck(name string, svcName types.NamespacedN
return hc, nil
}

func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node) (string, error) {
func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []string) (string, error) {
klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): checking group that it contains %v nodes", name, zone, len(nodes))
ig, err := g.GetInstanceGroup(name, zone)
if err != nil && !isNotFound(err) {
return "", err
}

kubeNodes := sets.NewString()
for _, n := range nodes {
kubeNodes.Insert(n.Name)
}

// Individual InstanceGroup has a limit for 1000 instances in it.
// As a result, it's not possible to add more to it.
// Given that the long-term fix (AlphaFeatureILBSubsets) is already in-progress,
// to stop the bleeding we now simply cut down the contents to first 1000
// instances in the alphabetical order. Since there is a limitation for
// 250 backend VMs for ILB, this isn't making things worse.
if len(kubeNodes) > maxInstancesPerInstanceGroup {
gceNodes := sets.NewString(nodes...)
if len(gceNodes) > maxInstancesPerInstanceGroup {
klog.Warningf("Limiting number of VMs for InstanceGroup %s to %d", name, maxInstancesPerInstanceGroup)
kubeNodes = sets.NewString(kubeNodes.List()[:maxInstancesPerInstanceGroup]...)
gceNodes = sets.NewString(gceNodes.List()[:maxInstancesPerInstanceGroup]...)
}

gceNodes := sets.NewString()
igNodes := sets.NewString()
if ig == nil {
klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): creating instance group", name, zone)
newIG := &compute.InstanceGroup{Name: name}
Expand All @@ -586,12 +576,12 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node)

for _, ins := range instances {
parts := strings.Split(ins.Instance, "/")
gceNodes.Insert(parts[len(parts)-1])
igNodes.Insert(parts[len(parts)-1])
}
}

removeNodes := gceNodes.Difference(kubeNodes).List()
addNodes := kubeNodes.Difference(gceNodes).List()
removeNodes := igNodes.Difference(gceNodes).List()
addNodes := gceNodes.Difference(igNodes).List()

if len(removeNodes) != 0 {
klog.V(2).Infof("ensureInternalInstanceGroup(%v, %v): removing nodes: %v", name, zone, removeNodes)
Expand All @@ -618,9 +608,48 @@ func (g *Cloud) ensureInternalInstanceGroup(name, zone string, nodes []*v1.Node)
func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]string, error) {
zonedNodes := splitNodesByZone(nodes)
klog.V(2).Infof("ensureInternalInstanceGroups(%v): %d nodes over %d zones in region %v", name, len(nodes), len(zonedNodes), g.region)

var igLinks []string
for zone, nodes := range zonedNodes {
igLink, err := g.ensureInternalInstanceGroup(name, zone, nodes)
gceZonedNodes := map[string][]string{}
for zone, zNodes := range zonedNodes {
hosts, err := g.getFoundInstanceByNames(nodeNames(zNodes))
if err != nil {
return nil, err
}
names := sets.NewString()
for _, h := range hosts {
names.Insert(h.Name)
}
skip := sets.NewString()

igs, err := g.candidateExternalInstanceGroups(zone)
if err != nil {
return nil, err
}
for _, ig := range igs {
if strings.EqualFold(ig.Name, name) {
continue
}
instances, err := g.ListInstancesInInstanceGroup(ig.Name, zone, allInstances)
if err != nil {
return nil, err
}
groupInstances := sets.NewString()
for _, ins := range instances {
parts := strings.Split(ins.Instance, "/")
groupInstances.Insert(parts[len(parts)-1])
}
if names.HasAll(groupInstances.UnsortedList()...) {
igLinks = append(igLinks, ig.SelfLink)
skip.Insert(groupInstances.UnsortedList()...)
}
}
if remaining := names.Difference(skip).UnsortedList(); len(remaining) > 0 {
gceZonedNodes[zone] = remaining
}
}
for zone, gceNodes := range gceZonedNodes {
igLink, err := g.ensureInternalInstanceGroup(name, zone, gceNodes)
if err != nil {
return []string{}, err
}
Expand All @@ -630,6 +659,13 @@ func (g *Cloud) ensureInternalInstanceGroups(name string, nodes []*v1.Node) ([]s
return igLinks, nil
}

func (g *Cloud) candidateExternalInstanceGroups(zone string) ([]*compute.InstanceGroup, error) {
if g.externalInstanceGroupsPrefix == "" {
return nil, nil
}
return g.ListInstanceGroupsWithPrefix(zone, g.externalInstanceGroupsPrefix)
}

func (g *Cloud) ensureInternalInstanceGroupsDeleted(name string) error {
// List of nodes isn't available here - fetch all zones in region and try deleting this cluster's ig
zones, err := g.ListZonesInRegion(g.region)
Expand Down
69 changes: 69 additions & 0 deletions providers/gce/gce_loadbalancer_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,75 @@ func TestEnsureLoadBalancerDeletedSucceedsOnXPN(t *testing.T) {
checkEvent(t, recorder, FilewallChangeMsg, true)
}

func TestEnsureInternalInstanceGroupsReuseGroups(t *testing.T) {
vals := DefaultTestClusterValues()
gce, err := fakeGCECloud(vals)
require.NoError(t, err)
gce.externalInstanceGroupsPrefix = "pre-existing"

igName := makeInstanceGroupName(vals.ClusterID)
nodesA, err := createAndInsertNodes(gce, []string{"test-node-1", "test-node-2"}, vals.ZoneName)
require.NoError(t, err)
nodesB, err := createAndInsertNodes(gce, []string{"test-node-3"}, vals.SecondaryZoneName)
require.NoError(t, err)

preIGName := "pre-existing-ig"
err = gce.CreateInstanceGroup(&compute.InstanceGroup{Name: preIGName}, vals.ZoneName)
require.NoError(t, err)
err = gce.CreateInstanceGroup(&compute.InstanceGroup{Name: preIGName}, vals.SecondaryZoneName)
require.NoError(t, err)
err = gce.AddInstancesToInstanceGroup(preIGName, vals.ZoneName, gce.ToInstanceReferences(vals.ZoneName, []string{"test-node-1"}))
require.NoError(t, err)
err = gce.AddInstancesToInstanceGroup(preIGName, vals.SecondaryZoneName, gce.ToInstanceReferences(vals.SecondaryZoneName, []string{"test-node-3"}))
require.NoError(t, err)

anotherPreIGName := "another-existing-ig"
err = gce.CreateInstanceGroup(&compute.InstanceGroup{Name: anotherPreIGName}, vals.ZoneName)
require.NoError(t, err)
err = gce.AddInstancesToInstanceGroup(anotherPreIGName, vals.ZoneName, gce.ToInstanceReferences(vals.ZoneName, []string{"test-node-2"}))
require.NoError(t, err)

svc := fakeLoadbalancerService(string(LBTypeInternal))
svc, err = gce.client.CoreV1().Services(svc.Namespace).Create(context.TODO(), svc, metav1.CreateOptions{})
assert.NoError(t, err)
_, err = gce.ensureInternalLoadBalancer(
vals.ClusterName, vals.ClusterID,
svc,
nil,
append(nodesA, nodesB...),
)
assert.NoError(t, err)

backendServiceName := makeBackendServiceName(gce.GetLoadBalancerName(context.TODO(), "", svc), vals.ClusterID, shareBackendService(svc), cloud.SchemeInternal, "TCP", svc.Spec.SessionAffinity)
bs, err := gce.GetRegionBackendService(backendServiceName, gce.region)
require.NoError(t, err)
assert.Equal(t, 3, len(bs.Backends), "Want three backends referencing three instances groups")

igRef := func(zone, name string) string {
return fmt.Sprintf("zones/%s/instanceGroups/%s", zone, name)
}
for _, name := range []string{igRef(vals.ZoneName, preIGName), igRef(vals.SecondaryZoneName, preIGName), igRef(vals.ZoneName, igName)} {
var found bool
for _, be := range bs.Backends {
if strings.Contains(be.Group, name) {
found = true
break
}
}
assert.True(t, found, "Expected list of backends to have group %q", name)
}

// Expect initial zone to have test-node-2
instances, err := gce.ListInstancesInInstanceGroup(igName, vals.ZoneName, "ALL")
require.NoError(t, err)
assert.Equal(t, 1, len(instances))
assert.Contains(
t,
instances[0].Instance,
fmt.Sprintf("%s/zones/%s/instances/%s", vals.ProjectID, vals.ZoneName, "test-node-2"),
)
}

func TestEnsureInternalInstanceGroupsDeleted(t *testing.T) {
vals := DefaultTestClusterValues()
gce, err := fakeGCECloud(vals)
Expand Down

0 comments on commit 9f6ed06

Please sign in to comment.