Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent stranding of partial load balancers resources #7852

Merged
3 commits merged into from
May 22, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pkg/cloudprovider/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,13 @@ type TCPLoadBalancer interface {
CreateTCPLoadBalancer(name, region string, externalIP net.IP, ports []int, hosts []string, affinityType api.AffinityType) (string, error)
// UpdateTCPLoadBalancer updates hosts under the specified load balancer.
UpdateTCPLoadBalancer(name, region string, hosts []string) error
// DeleteTCPLoadBalancer deletes a specified load balancer.
DeleteTCPLoadBalancer(name, region string) error
// EnsureTCPLoadBalancerDeleted deletes the specified load balancer if it
// exists, returning nil if the load balancer specified either didn't exist or
// was successfully deleted.
// This construction is useful because many cloud providers' load balancers
// have multiple underlying components, meaning a Get could say that the LB
// doesn't exist even if some part of it is still laying around.
EnsureTCPLoadBalancerDeleted(name, region string) error
}

// Instances is an abstract, pluggable interface for sets of instances.
Expand Down
4 changes: 2 additions & 2 deletions pkg/cloudprovider/fake/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ func (f *FakeCloud) UpdateTCPLoadBalancer(name, region string, hosts []string) e
return f.Err
}

// DeleteTCPLoadBalancer is a test-spy implementation of TCPLoadBalancer.DeleteTCPLoadBalancer.
// EnsureTCPLoadBalancerDeleted is a test-spy implementation of TCPLoadBalancer.EnsureTCPLoadBalancerDeleted.
// It adds an entry "delete" into the internal method call record.
func (f *FakeCloud) DeleteTCPLoadBalancer(name, region string) error {
func (f *FakeCloud) EnsureTCPLoadBalancerDeleted(name, region string) error {
f.addCall("delete")
return f.Err
}
Expand Down
5 changes: 3 additions & 2 deletions pkg/cloudprovider/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,8 @@ func (gce *GCECloud) UpdateTCPLoadBalancer(name, region string, hosts []string)
return nil
}

// DeleteTCPLoadBalancer is an implementation of TCPLoadBalancer.DeleteTCPLoadBalancer.
func (gce *GCECloud) DeleteTCPLoadBalancer(name, region string) error {
// EnsureTCPLoadBalancerDeleted is an implementation of TCPLoadBalancer.EnsureTCPLoadBalancerDeleted.
func (gce *GCECloud) EnsureTCPLoadBalancerDeleted(name, region string) error {
op, err := gce.service.ForwardingRules.Delete(gce.projectID, region, name).Do()
if err != nil && isHTTPErrorCode(err, http.StatusNotFound) {
glog.Infof("Forwarding rule %s already deleted. Continuing to delete target pool.", name)
Expand All @@ -411,6 +411,7 @@ func (gce *GCECloud) DeleteTCPLoadBalancer(name, region string) error {
err = gce.waitForRegionOp(op, region)
if err != nil {
glog.Warningf("Failed waiting for Forwarding Rule %s to be deleted: got error %s.", name, err.Error())
return err
}
}
op, err = gce.service.TargetPools.Delete(gce.projectID, region, name).Do()
Expand Down
53 changes: 36 additions & 17 deletions pkg/cloudprovider/openstack/openstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"fmt"
"io"
"net"
"net/http"
"regexp"
"time"

Expand Down Expand Up @@ -630,31 +631,49 @@ func (lb *LoadBalancer) UpdateTCPLoadBalancer(name, region string, hosts []strin
return nil
}

func (lb *LoadBalancer) DeleteTCPLoadBalancer(name, region string) error {
glog.V(4).Infof("DeleteTCPLoadBalancer(%v, %v)", name, region)

vip, err := getVipByName(lb.network, name)
if err != nil {
return err
}

pool, err := pools.Get(lb.network, vip.PoolID).Extract()
if err != nil {
return err
func (lb *LoadBalancer) EnsureTCPLoadBalancerDeleted(name, region string) error {
glog.V(4).Infof("EnsureTCPLoadBalancerDeleted(%v, %v)", name, region)

// TODO(#8352): Because we look up the pool using the VIP object, if the VIP
// is already gone we can't attempt to delete the pool. We should instead
// continue even if the VIP doesn't exist and attempt to delete the pool by
// name.
vip, vipErr := getVipByName(lb.network, name)
if vipErr == ErrNotFound {
return nil
} else if vipErr != nil {
return vipErr
}

// It's ok if the pool doesn't exist, as we may still need to delete the vip
// (although I don't believe the system should ever be in that state).
pool, poolErr := pools.Get(lb.network, vip.PoolID).Extract()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the above returned ErrNotFound, then vip.PoolID will be undefined here.

if poolErr != nil {
detailedErr, ok := poolErr.(*gophercloud.UnexpectedResponseCodeError)
if !ok || detailedErr.Actual != http.StatusNotFound {
return poolErr
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, it looks like I'm not deleting the monitor object at this point - that's a bug.

}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same nit as above.

poolExists := (poolErr == nil)

// Have to delete VIP before pool can be deleted
err = vips.Delete(lb.network, vip.ID).ExtractErr()
if err != nil {
// We have to delete the VIP before the pool can be deleted, so we can't
// continue on if this fails.
// TODO(#8352): Only do this if the VIP exists once we can delete pools by
// name rather than by ID.
err := vips.Delete(lb.network, vip.ID).ExtractErr()
if err != nil && err != ErrNotFound {
return err
}

// Ignore errors for everything following here

for _, monId := range pool.MonitorIDs {
pools.DisassociateMonitor(lb.network, pool.ID, monId)
if poolExists {
for _, monId := range pool.MonitorIDs {
// TODO(#8352): Delete the monitor, don't just disassociate it.
pools.DisassociateMonitor(lb.network, pool.ID, monId)
}
pools.Delete(lb.network, pool.ID)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you need to return an error here if the pool deletion fails, so that you will retry again later?
I'm pretty sure that given the eventual consistency of the underlying GCE, a significant number of these pool deletions are going to fail because the monitor disassociation will not have completed under the hood yet.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd think so as well, but I don't know why @anguslees originally wrote it this way, so I'm hesitant to change it, given that it looks to have been an intentional choice. It'd be nice if he could chime in, but otherwise I don't want to mess with it given that I have no way of testing it (since I don't have an openstack cluster).

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My apologies - I'd overlooked the fact that this was openstack and not GCE.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to delete the vip before the pool (otherwise the pool is still in use), and once the vip is deleted GetTCPLoadBalancerExists will indicate "does not exist", and we could potentially recreate another vip with the same name (and a new pool, also with the same name). My intention (and the current code) was just to leak the pool object during partial failures (the "ignore errors following here" comment could have been clearer).

Now that we're looking at retrying the delete operation, we should definitely disassociate (and delete!) the monitors before attempting to remove the vip (partial failures will lead to missing health checks, but if DeleteLoadBalancer is retried then eventually we might delete everything successfully). If the disassociate succeeds and the monitor delete fails, we're still leaking the monitor.

The pool is still awkward. It is possible to lookup the pool by name (rather than ID), but the name is not necessarily unique - if we have already recreated a new LoadBalancer with the same name then we could delete the wrong pool. With a bit more clarity around how k8s will interpret "Get" and "Create" when "Delete" fails we could assume the pool name is unique and use that to retry deleting the pool.

Less desirable alternatives might include trying to "unwind" and recreate the vip if there were failures deleting the pool (yuck); using OpenStack metadata to associate pool+monitor to original LB (I don't modify metadata anywhere else); or modifying k8s to allow cloud providers to store resource metadata somewhere within k8s (bigger change, but could also lead to performance improvements with object lookup).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since a couple weeks ago, load balancers have been named using their service's UID, so there will never be name conflicts that enable reusing old pools. Given that, would you be ok with switching over to getting the pool by name? This would make the OpenStack behavior almost identical to the GCE behavior, meaning it'll be less likely to break in weird ways in the future.

In the meantime, I can have this function gracefully return nil if the vip doesn't exist. Once you've switched the pool lookups to be by name, we can go back to trying to delete the pool even if the vip has already been deleted.

How does that sound? I'll open up an issue for doing this work and for cleaning up the monitors that aren't currently being deleted.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since a couple weeks ago, load balancers have been named using their service's UID, so there will never be name conflicts that enable reusing old pools.

(thinking)

What about:

  • Create() creates pool successfully, fails to create vip, fails to delete new pool during unwind
  • Create() now gets called again.

Do we now have a duplicate pool? Or will Create() call Delete repeatedly in some way before reattempting the Create?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The model I've used in the past (and now in the GCE implementation) is to ensure idempotency of each individual step. If we attempt to create a pool and one already exists with that name, we consider it a success and continue to the next step. That way, the service controller can keep retrying until the creation succeeds.

Effectively, we'd just have to check the return code from the pool creation attempt, and if it's an already-exists error, consider it a success.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I'm really not trying to be awkward here)

OpenStack resource names are not unique, so we won't get an already-exists error at the underlying level. We will have to search first, and then create - which is easy enough, but does leave a small race window that could result in duplicate pools.

I'm ok with all this btw. I think there's still some hairy corner cases involving k8s restarts/etc that will result in odd/leaked resources, but I agree that the proposed change is better than the existing logic.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I didn't realize that about Openstack. It does make this strategy a little less effective. I'm definitely open to alternative implementations, I just want to reduce the likelihood of leaked resources.

So long as having duplicate pools around doesn't break functionality, I'd expect we could also avoid stranding resources even in that rare case by deleting all pools with the UID-derived name when deleting the LB.

}
pools.Delete(lb.network, pool.ID)

return nil
}
Expand Down
23 changes: 3 additions & 20 deletions pkg/cloudprovider/servicecontroller/servicecontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ func (s *ServiceController) processDelta(delta *cache.Delta) (error, bool) {
cachedService.service = service
s.cache.set(namespacedName.String(), cachedService)
case cache.Deleted:
err := s.ensureLBDeleted(service)
err := s.balancer.EnsureTCPLoadBalancerDeleted(s.loadBalancerName(service), s.zone.Region)
if err != nil {
return err, retryable
}
Expand All @@ -230,7 +230,7 @@ func (s *ServiceController) createLoadBalancerIfNeeded(namespacedName types.Name
// we can recreate it cleanly.
if cachedService.Spec.CreateExternalLoadBalancer {
glog.Infof("Deleting existing load balancer for service %s that needs an updated load balancer.", namespacedName)
if err := s.ensureLBDeleted(cachedService); err != nil {
if err := s.balancer.EnsureTCPLoadBalancerDeleted(s.loadBalancerName(cachedService), s.zone.Region); err != nil {
return err, retryable
}
}
Expand All @@ -251,7 +251,7 @@ func (s *ServiceController) createLoadBalancerIfNeeded(namespacedName types.Name
} else if exists {
glog.Infof("Deleting old LB for previously uncached service %s whose endpoint %s doesn't match the service's desired IPs %v",
namespacedName, endpoint, service.Spec.PublicIPs)
if err := s.ensureLBDeleted(service); err != nil {
if err := s.balancer.EnsureTCPLoadBalancerDeleted(s.loadBalancerName(service), s.zone.Region); err != nil {
return err, retryable
}
}
Expand Down Expand Up @@ -344,23 +344,6 @@ func (s *ServiceController) createExternalLoadBalancer(service *api.Service) err
return nil
}

// Ensures that the load balancer associated with the given service is deleted,
// doing the deletion if necessary. Should always be retried upon failure.
func (s *ServiceController) ensureLBDeleted(service *api.Service) error {
// This is only needed because not all delete load balancer implementations
// are currently idempotent to the LB not existing.
if _, exists, err := s.balancer.GetTCPLoadBalancer(s.loadBalancerName(service), s.zone.Region); err != nil {
return err
} else if !exists {
return nil
}

if err := s.balancer.DeleteTCPLoadBalancer(s.loadBalancerName(service), s.zone.Region); err != nil {
return err
}
return nil
}

// ListKeys implements the interface required by DeltaFIFO to list the keys we
// already know about.
func (s *serviceCache) ListKeys() []string {
Expand Down