Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

always clean gce resources in service e2e #32183

Merged
merged 2 commits into from
Oct 26, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 17 additions & 0 deletions pkg/cloudprovider/providers/gce/gce.go
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,23 @@ func (gce *GCECloud) deleteForwardingRule(name, region string) error {
return nil
}

func (gce *GCECloud) DeleteForwardingRule(name string) error {
region, err := GetGCERegion(gce.localZone)
if err != nil {
return err
}
return gce.deleteForwardingRule(name, region)
}

// DeleteTargetPool deletes the given target pool.
func (gce *GCECloud) DeleteTargetPool(name string, hc *compute.HttpHealthCheck) error {
region, err := GetGCERegion(gce.localZone)
if err != nil {
return err
}
return gce.deleteTargetPool(name, region, hc)
}

func (gce *GCECloud) deleteTargetPool(name, region string, hc *compute.HttpHealthCheck) error {
op, err := gce.service.TargetPools.Delete(gce.projectID, region, name).Do()
if err != nil && isHTTPErrorCode(err, http.StatusNotFound) {
Expand Down
13 changes: 13 additions & 0 deletions test/e2e/framework/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4582,3 +4582,16 @@ func (p *E2ETestNodePreparer) CleanupNodes() error {
}
return encounteredError
}

func CleanupGCEResources(loadBalancerName string) (err error) {
gceCloud, ok := TestContext.CloudConfig.Provider.(*gcecloud.GCECloud)
if !ok {
return fmt.Errorf("failed to convert CloudConfig.Provider to GCECloud: %#v", TestContext.CloudConfig.Provider)
}
gceCloud.DeleteFirewall(loadBalancerName)
gceCloud.DeleteForwardingRule(loadBalancerName)
gceCloud.DeleteGlobalStaticIP(loadBalancerName)
hc, _ := gceCloud.GetHttpHealthCheck(loadBalancerName)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we create all health checks with the same name? I can have multiple nodePorts/health checks per service right?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

HTTP health check name == LB name. We allocate just a single healthCheckNodePort per service today (for ESIPP services).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this line is correct, right? i'd keep it for now.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah it's correct, I assuemd we were doing a health check per taretPort. Girish is verifying the case of one targetPort group failing health checks and another passing, and how that manifests with a single health check per service.

gceCloud.DeleteTargetPool(loadBalancerName, hc)
return nil
}
64 changes: 64 additions & 0 deletions test/e2e/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,24 @@ var _ = framework.KubeDescribe("Services", func() {
f := framework.NewDefaultFramework("services")

var cs clientset.Interface
serviceLBNames := []string{}

BeforeEach(func() {
cs = f.ClientSet
})

AfterEach(func() {
if CurrentGinkgoTestDescription().Failed {
describeSvc(f.Namespace.Name)
}
for _, lb := range serviceLBNames {
framework.Logf("cleaning gce resource for %s", lb)
cleanupServiceGCEResources(lb)
}
//reset serviceLBNames
serviceLBNames = []string{}
})

// TODO: We get coverage of TCP/UDP and multi-port services through the DNS test. We should have a simpler test for multi-port TCP here.

It("should provide secure master service [Conformance]", func() {
Expand Down Expand Up @@ -582,6 +595,10 @@ var _ = framework.KubeDescribe("Services", func() {
s.Spec.Type = api.ServiceTypeLoadBalancer
})
}
serviceLBNames = append(serviceLBNames, getLoadBalancerName(tcpService))
if loadBalancerSupportsUDP {
serviceLBNames = append(serviceLBNames, getLoadBalancerName(udpService))
}

By("waiting for the TCP service to have a load balancer")
// Wait for the load balancer to be created asynchronously
Expand Down Expand Up @@ -1083,6 +1100,7 @@ var _ = framework.KubeDescribe("ESIPP [Slow][Feature:ExternalTrafficLocalOnly]",
loadBalancerCreateTimeout := loadBalancerCreateTimeoutDefault

var cs clientset.Interface
serviceLBNames := []string{}

BeforeEach(func() {
// requires cloud load-balancer support - this feature currently supported only on GCE/GKE
Expand All @@ -1094,12 +1112,25 @@ var _ = framework.KubeDescribe("ESIPP [Slow][Feature:ExternalTrafficLocalOnly]",
}
})

AfterEach(func() {
if CurrentGinkgoTestDescription().Failed {
describeSvc(f.Namespace.Name)
}
for _, lb := range serviceLBNames {
framework.Logf("cleaning gce resource for %s", lb)
cleanupServiceGCEResources(lb)
}
//reset serviceLBNames
serviceLBNames = []string{}
})

It("should work for type=LoadBalancer [Slow][Feature:ExternalTrafficLocalOnly]", func() {
namespace := f.Namespace.Name
serviceName := "external-local"
jig := NewServiceTestJig(cs, serviceName)

svc := jig.createOnlyLocalLoadBalancerService(namespace, serviceName, loadBalancerCreateTimeout, true)
serviceLBNames = append(serviceLBNames, getLoadBalancerName(svc))
healthCheckNodePort := int(service.GetServiceHealthCheckNodePort(svc))
if healthCheckNodePort == 0 {
framework.Failf("Service HealthCheck NodePort was not allocated")
Expand Down Expand Up @@ -1165,6 +1196,7 @@ var _ = framework.KubeDescribe("ESIPP [Slow][Feature:ExternalTrafficLocalOnly]",
nodes := jig.getNodes(maxNodesForEndpointsTests)

svc := jig.createOnlyLocalLoadBalancerService(namespace, serviceName, loadBalancerCreateTimeout, false)
serviceLBNames = append(serviceLBNames, getLoadBalancerName(svc))
defer func() {
jig.ChangeServiceType(svc.Namespace, svc.Name, api.ServiceTypeClusterIP, loadBalancerCreateTimeout)
Expect(cs.Core().Services(svc.Namespace).Delete(svc.Name, nil)).NotTo(HaveOccurred())
Expand Down Expand Up @@ -1224,6 +1256,7 @@ var _ = framework.KubeDescribe("ESIPP [Slow][Feature:ExternalTrafficLocalOnly]",
nodes := jig.getNodes(maxNodesForEndpointsTests)

svc := jig.createOnlyLocalLoadBalancerService(namespace, serviceName, loadBalancerCreateTimeout, true)
serviceLBNames = append(serviceLBNames, getLoadBalancerName(svc))
defer func() {
jig.ChangeServiceType(svc.Namespace, svc.Name, api.ServiceTypeClusterIP, loadBalancerCreateTimeout)
Expect(cs.Core().Services(svc.Namespace).Delete(svc.Name, nil)).NotTo(HaveOccurred())
Expand Down Expand Up @@ -1272,6 +1305,7 @@ var _ = framework.KubeDescribe("ESIPP [Slow][Feature:ExternalTrafficLocalOnly]",
}

svc := jig.createOnlyLocalLoadBalancerService(namespace, serviceName, loadBalancerCreateTimeout, true)
serviceLBNames = append(serviceLBNames, getLoadBalancerName(svc))
defer func() {
jig.ChangeServiceType(svc.Namespace, svc.Name, api.ServiceTypeClusterIP, loadBalancerCreateTimeout)
Expect(cs.Core().Services(svc.Namespace).Delete(svc.Name, nil)).NotTo(HaveOccurred())
Expand Down Expand Up @@ -2697,3 +2731,33 @@ func execSourceipTest(f *framework.Framework, c clientset.Interface, ns, nodeNam
}
return execPod.Status.PodIP, outputs[1]
}

func getLoadBalancerName(service *api.Service) string {
//GCE requires that the name of a load balancer starts with a lower case letter.
ret := "a" + string(service.UID)
ret = strings.Replace(ret, "-", "", -1)
//AWS requires that the name of a load balancer is shorter than 32 bytes.
if len(ret) > 32 {
ret = ret[:32]
}
return ret
}

func cleanupServiceGCEResources(loadBalancerName string) {
if pollErr := wait.Poll(5*time.Second, lbCleanupTimeout, func() (bool, error) {
if err := framework.CleanupGCEResources(loadBalancerName); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the way we do this in ingress e2e is first poll on the resources and wait for the servicecontroller to delete them in, say, 5m. If this happens, test pass. Otherwise test fails, but at the end we delete the resources ourselves.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mfanjie I agree with @bprashanth - I see you have this called from AfterEac - I think we should not cleanup immediately. Lets delete the services, then keep polling to observe that servicecontroller cleans up after service deletion.
We cleanup only as a last resort and also fail the test if we have to cleanup.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the behavior you mentioned existing in current code.
if you look at https://github.com/kubernetes/kubernetes/blob/master/test/e2e/service.go#L748-L766, the last step of the it is changing the service type back to cluster ip and waiting for the LB deletion, so the leak is only possible when error occurs during the service LB type being changed, or some panic happened before that. (we might need wrap this block and put to defer).
In addition, we need to clean the related resource in case the above description does not behave correctly.
There are two service being set LB type for the whole service e2e, tcp service, udp service for mutability-test , and external-local service, and is not reused in different It(). Only these three service LB names are put to the string array, so it results same in AfterEach or in last resort.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we're adding a test that deletes an lb service, that's why i tagged Girish: https://github.com/kubernetes/kubernetes/pull/31991/files#diff-20a4e2095b63ecd60dd25e78bcd67372R1232

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually the test already exists and I think we just don't cleanup the lb (https://github.com/kubernetes/kubernetes/blob/master/test/e2e/service.go#L1087) but I didn't look too close, maybe we do in some aftereach block

framework.Logf("Still waiting for glbc to cleanup: %v", err)
return false, nil
}
return true, nil
}); pollErr != nil {
framework.Failf("Failed to cleanup service GCE resources.")
}
}

func describeSvc(ns string) {
framework.Logf("\nOutput of kubectl describe svc:\n")
desc, _ := framework.RunKubectl(
"describe", "svc", fmt.Sprintf("--namespace=%v", ns))
framework.Logf(desc)
}