Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix failing service e2e due to execPod unavailability #80805

Merged
merged 1 commit into from Aug 2, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
47 changes: 34 additions & 13 deletions test/e2e/network/service.go
Expand Up @@ -1474,6 +1474,13 @@ var _ = SIGDescribe("Services", func() {
// This container is an nginx container listening on port 80
// See kubernetes/contrib/ingress/echoheaders/nginx.conf for content of response
jig.RunOrFail(namespace, nil)
var err error
// Make sure acceptPod is running. There are certain chances that pod might be teminated due to unexpected reasons.
acceptPod, err = cs.CoreV1().Pods(namespace).Get(acceptPod.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "Unable to get pod %s", acceptPod.Name)
framework.ExpectEqual(acceptPod.Status.Phase, v1.PodRunning)
framework.ExpectNotEqual(acceptPod.Status.PodIP, "")

// Create loadbalancer service with source range from node[0] and podAccept
svc := jig.CreateTCPServiceOrFail(namespace, func(svc *v1.Service) {
svc.Spec.Type = v1.ServiceTypeLoadBalancer
Expand Down Expand Up @@ -1503,6 +1510,12 @@ var _ = SIGDescribe("Services", func() {
framework.CheckReachabilityFromPod(true, loadBalancerLagTimeout, namespace, acceptPod.Name, svcIP)
framework.CheckReachabilityFromPod(false, normalReachabilityTimeout, namespace, dropPod.Name, svcIP)

// Make sure dropPod is running. There are certain chances that the pod might be teminated due to unexpected reasons. dropPod, err = cs.CoreV1().Pods(namespace).Get(dropPod.Name, metav1.GetOptions{})
dropPod, err = cs.CoreV1().Pods(namespace).Get(dropPod.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "Unable to get pod %s", dropPod.Name)
framework.ExpectEqual(acceptPod.Status.Phase, v1.PodRunning)
framework.ExpectNotEqual(acceptPod.Status.PodIP, "")

ginkgo.By("Update service LoadBalancerSourceRange and check reachability")
jig.UpdateServiceOrFail(svc.Namespace, svc.Name, func(svc *v1.Service) {
// only allow access from dropPod
Expand Down Expand Up @@ -2159,10 +2172,10 @@ var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
})

ginkgo.It("should work from pods", func() {
var err error
namespace := f.Namespace.Name
serviceName := "external-local-pods"
jig := e2eservice.NewTestJig(cs, serviceName)
nodes := jig.GetNodes(e2eservice.MaxNodesForEndpointsTests)

svc := jig.CreateOnlyLocalLoadBalancerService(namespace, serviceName, loadBalancerCreateTimeout, true, nil)
serviceLBNames = append(serviceLBNames, cloudprovider.DefaultLoadBalancerName(svc))
Expand All @@ -2176,33 +2189,41 @@ var _ = SIGDescribe("ESIPP [Slow] [DisabledForLargeClusters]", func() {
port := strconv.Itoa(int(svc.Spec.Ports[0].Port))
ipPort := net.JoinHostPort(ingressIP, port)
path := fmt.Sprintf("%s/clientip", ipPort)
nodeName := nodes.Items[0].Name
podName := "execpod-sourceip"

ginkgo.By(fmt.Sprintf("Creating %v on node %v", podName, nodeName))
execPod := e2epod.CreateExecPodOrFail(f.ClientSet, namespace, podName, func(pod *v1.Pod) {
pod.Spec.NodeName = nodeName
})
ginkgo.By("Creating pause pod deployment to make sure, pausePods are in desired state")
deployment := jig.CreatePausePodDeployment("pause-pod-deployment", namespace, int32(1))
framework.ExpectNoError(e2edeploy.WaitForDeploymentComplete(cs, deployment), "Failed to complete pause pod deployment")

defer func() {
err := cs.CoreV1().Pods(namespace).Delete(execPod.Name, nil)
framework.ExpectNoError(err, "failed to delete pod: %s", execPod.Name)
e2elog.Logf("Deleting deployment")
err = cs.AppsV1().Deployments(namespace).Delete(deployment.Name, &metav1.DeleteOptions{})
framework.ExpectNoError(err, "Failed to delete deployment %s", deployment.Name)
}()

deployment, err = cs.AppsV1().Deployments(namespace).Get(deployment.Name, metav1.GetOptions{})
framework.ExpectNoError(err, "Error in retriving pause pod deployment")
labelSelector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We might want to put framework.ExpectNoError(err) for the previous call.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was not clear with this suggestion but added framework.ExpectNoError(err) for metab1.LabelSelector statement along with other effective change in error checking statements.

framework.ExpectNoError(err, "Error in setting LabelSelector as selector from deployment")

pausePods, err := cs.CoreV1().Pods(namespace).List(metav1.ListOptions{LabelSelector: labelSelector.String()})
framework.ExpectNoError(err, "Error in listing pods associated with pause pod deployments")

pausePod := pausePods.Items[0]
e2elog.Logf("Waiting up to %v curl %v", e2eservice.KubeProxyLagTimeout, path)
cmd := fmt.Sprintf(`curl -q -s --connect-timeout 30 %v`, path)

var srcIP string
ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, podName, nodeName))
ginkgo.By(fmt.Sprintf("Hitting external lb %v from pod %v on node %v", ingressIP, pausePod.Name, pausePod.Spec.NodeName))
if pollErr := wait.PollImmediate(framework.Poll, e2eservice.LoadBalancerCreateTimeoutDefault, func() (bool, error) {
stdout, err := framework.RunHostCmd(execPod.Namespace, execPod.Name, cmd)
stdout, err := framework.RunHostCmd(pausePod.Namespace, pausePod.Name, cmd)
if err != nil {
e2elog.Logf("got err: %v, retry until timeout", err)
return false, nil
}
srcIP = strings.TrimSpace(strings.Split(stdout, ":")[0])
return srcIP == execPod.Status.PodIP, nil
return srcIP == pausePod.Status.PodIP, nil
}); pollErr != nil {
e2elog.Failf("Source IP not preserved from %v, expected '%v' got '%v'", podName, execPod.Status.PodIP, srcIP)
e2elog.Failf("Source IP not preserved from %v, expected '%v' got '%v'", pausePod.Name, pausePod.Status.PodIP, srcIP)
}
})

Expand Down