diff --git a/hack/.golint_failures b/hack/.golint_failures index 09f0f40a4d8a..d65f45fec6fc 100644 --- a/hack/.golint_failures +++ b/hack/.golint_failures @@ -640,6 +640,7 @@ test/e2e/autoscaling test/e2e/chaosmonkey test/e2e/common test/e2e/framework +test/e2e/framework/providers/gce test/e2e/lifecycle test/e2e/lifecycle/bootstrap test/e2e/network diff --git a/test/e2e/common/util.go b/test/e2e/common/util.go index 9af3d2f2c936..63e821e3b30a 100644 --- a/test/e2e/common/util.go +++ b/test/e2e/common/util.go @@ -22,7 +22,7 @@ import ( "text/template" "time" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/apimachinery/pkg/util/sets" @@ -172,7 +172,7 @@ func RestartNodes(c clientset.Interface, nodes []v1.Node) error { // Wait for their boot IDs to change. for i := range nodes { node := &nodes[i] - if err := wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) { + if err := wait.Poll(30*time.Second, framework.RestartNodeReadyAgainTimeout, func() (bool, error) { newNode, err := c.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{}) if err != nil { return false, fmt.Errorf("error getting node info after reboot: %s", err) diff --git a/test/e2e/framework/BUILD b/test/e2e/framework/BUILD index 7b3142f6bc37..2bf0c43aad82 100644 --- a/test/e2e/framework/BUILD +++ b/test/e2e/framework/BUILD @@ -58,6 +58,7 @@ go_library( "//pkg/kubelet/dockershim/metrics:go_default_library", "//pkg/kubelet/events:go_default_library", "//pkg/kubelet/metrics:go_default_library", + "//pkg/kubelet/pod:go_default_library", "//pkg/kubelet/sysctl:go_default_library", "//pkg/kubelet/util/format:go_default_library", "//pkg/master/ports:go_default_library", diff --git a/test/e2e/framework/providers/gce/BUILD b/test/e2e/framework/providers/gce/BUILD index ac4dbf0dbec3..4e37f01426aa 100644 --- a/test/e2e/framework/providers/gce/BUILD +++ b/test/e2e/framework/providers/gce/BUILD @@ -6,6 +6,8 @@ go_library( "firewall.go", "gce.go", "ingress.go", + "recreate_node.go", + "util.go", ], importpath = "k8s.io/kubernetes/test/e2e/framework/providers/gce", visibility = ["//visibility:public"], @@ -13,13 +15,17 @@ go_library( "//pkg/cloudprovider/providers/gce:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library", + "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//staging/src/k8s.io/cloud-provider:go_default_library", "//test/e2e/framework:go_default_library", + "//test/utils:go_default_library", "//vendor/github.com/onsi/ginkgo:go_default_library", + "//vendor/github.com/onsi/gomega:go_default_library", "//vendor/google.golang.org/api/compute/v1:go_default_library", "//vendor/google.golang.org/api/googleapi:go_default_library", "//vendor/k8s.io/utils/exec:go_default_library", diff --git a/test/e2e/framework/providers/gce/recreate_node.go b/test/e2e/framework/providers/gce/recreate_node.go new file mode 100644 index 000000000000..06a4c2e90c40 --- /dev/null +++ b/test/e2e/framework/providers/gce/recreate_node.go @@ -0,0 +1,131 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gce + +import ( + "fmt" + "time" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + "k8s.io/apimachinery/pkg/labels" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" + testutils "k8s.io/kubernetes/test/utils" +) + +func nodeNames(nodes []v1.Node) []string { + result := make([]string, 0, len(nodes)) + for i := range nodes { + result = append(result, nodes[i].Name) + } + return result +} + +func podNames(pods []v1.Pod) []string { + result := make([]string, 0, len(pods)) + for i := range pods { + result = append(result, pods[i].Name) + } + return result +} + +var _ = Describe("Recreate [Feature:Recreate]", func() { + f := framework.NewDefaultFramework("recreate") + var originalNodes []v1.Node + var originalPodNames []string + var ps *testutils.PodStore + systemNamespace := metav1.NamespaceSystem + BeforeEach(func() { + framework.SkipUnlessProviderIs("gce", "gke") + var err error + numNodes, err := framework.NumberOfRegisteredNodes(f.ClientSet) + Expect(err).NotTo(HaveOccurred()) + originalNodes, err = framework.CheckNodesReady(f.ClientSet, numNodes, framework.NodeReadyInitialTimeout) + Expect(err).NotTo(HaveOccurred()) + + framework.Logf("Got the following nodes before recreate %v", nodeNames(originalNodes)) + + ps, err = testutils.NewPodStore(f.ClientSet, systemNamespace, labels.Everything(), fields.Everything()) + allPods := ps.List() + originalPods := framework.FilterNonRestartablePods(allPods) + originalPodNames = make([]string, len(originalPods)) + for i, p := range originalPods { + originalPodNames[i] = p.ObjectMeta.Name + } + + if !framework.CheckPodsRunningReadyOrSucceeded(f.ClientSet, systemNamespace, originalPodNames, framework.PodReadyBeforeTimeout) { + framework.Failf("At least one pod wasn't running and ready or succeeded at test start.") + } + + }) + + AfterEach(func() { + if CurrentGinkgoTestDescription().Failed { + // Make sure that addon/system pods are running, so dump + // events for the kube-system namespace on failures + By(fmt.Sprintf("Collecting events from namespace %q.", systemNamespace)) + events, err := f.ClientSet.CoreV1().Events(systemNamespace).List(metav1.ListOptions{}) + Expect(err).NotTo(HaveOccurred()) + + for _, e := range events.Items { + framework.Logf("event for %v: %v %v: %v", e.InvolvedObject.Name, e.Source, e.Reason, e.Message) + } + } + if ps != nil { + ps.Stop() + } + }) + + It("recreate nodes and ensure they function upon restart", func() { + testRecreate(f.ClientSet, ps, systemNamespace, originalNodes, originalPodNames) + }) +}) + +// Recreate all the nodes in the test instance group +func testRecreate(c clientset.Interface, ps *testutils.PodStore, systemNamespace string, nodes []v1.Node, podNames []string) { + err := recreateNodes(c, nodes) + if err != nil { + framework.Failf("Test failed; failed to start the restart instance group command.") + } + + err = waitForNodeBootIdsToChange(c, nodes, framework.RecreateNodeReadyAgainTimeout) + if err != nil { + framework.Failf("Test failed; failed to recreate at least one node in %v.", framework.RecreateNodeReadyAgainTimeout) + } + + nodesAfter, err := framework.CheckNodesReady(c, len(nodes), framework.RestartNodeReadyAgainTimeout) + Expect(err).NotTo(HaveOccurred()) + framework.Logf("Got the following nodes after recreate: %v", nodeNames(nodesAfter)) + + if len(nodes) != len(nodesAfter) { + framework.Failf("Had %d nodes before nodes were recreated, but now only have %d", + len(nodes), len(nodesAfter)) + } + + // Make sure the pods from before node recreation are running/completed + podCheckStart := time.Now() + podNamesAfter, err := framework.WaitForNRestartablePods(ps, len(podNames), framework.RestartPodReadyAgainTimeout) + Expect(err).NotTo(HaveOccurred()) + remaining := framework.RestartPodReadyAgainTimeout - time.Since(podCheckStart) + if !framework.CheckPodsRunningReadyOrSucceeded(c, systemNamespace, podNamesAfter, remaining) { + framework.Failf("At least one pod wasn't running and ready after the restart.") + } +} diff --git a/test/e2e/framework/providers/gce/util.go b/test/e2e/framework/providers/gce/util.go new file mode 100644 index 000000000000..1eaf138e16b2 --- /dev/null +++ b/test/e2e/framework/providers/gce/util.go @@ -0,0 +1,91 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package gce + +import ( + "fmt" + "strings" + "time" + + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/kubernetes/test/e2e/framework" +) + +func recreateNodes(c clientset.Interface, nodes []v1.Node) error { + // Build mapping from zone to nodes in that zone. + nodeNamesByZone := make(map[string][]string) + for i := range nodes { + node := &nodes[i] + zone := framework.TestContext.CloudConfig.Zone + if z, ok := node.Labels[v1.LabelZoneFailureDomain]; ok { + zone = z + } + nodeNamesByZone[zone] = append(nodeNamesByZone[zone], node.Name) + } + + // Find the sole managed instance group name + var instanceGroup string + if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 { + return fmt.Errorf("Test does not support cluster setup with more than one managed instance group: %s", framework.TestContext.CloudConfig.NodeInstanceGroup) + } + instanceGroup = framework.TestContext.CloudConfig.NodeInstanceGroup + + // Recreate the nodes. + for zone, nodeNames := range nodeNamesByZone { + args := []string{ + "compute", + fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID), + "instance-groups", + "managed", + "recreate-instances", + instanceGroup, + } + + args = append(args, fmt.Sprintf("--instances=%s", strings.Join(nodeNames, ","))) + args = append(args, fmt.Sprintf("--zone=%s", zone)) + framework.Logf("Recreating instance group %s.", instanceGroup) + stdout, stderr, err := framework.RunCmd("gcloud", args...) + if err != nil { + return fmt.Errorf("error restarting nodes: %s\nstdout: %s\nstderr: %s", err, stdout, stderr) + } + } + return nil +} + +func waitForNodeBootIdsToChange(c clientset.Interface, nodes []v1.Node, timeout time.Duration) error { + errMsg := []string{} + for i := range nodes { + node := &nodes[i] + if err := wait.Poll(30*time.Second, timeout, func() (bool, error) { + newNode, err := c.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{}) + if err != nil { + framework.Logf("Could not get node info: %s. Retrying in %v.", err, 30*time.Second) + return false, nil + } + return node.Status.NodeInfo.BootID != newNode.Status.NodeInfo.BootID, nil + }); err != nil { + errMsg = append(errMsg, "Error waiting for node %s boot ID to change: %s", node.Name, err.Error()) + } + } + if len(errMsg) > 0 { + return fmt.Errorf(strings.Join(errMsg, ",")) + } + return nil +} diff --git a/test/e2e/framework/util.go b/test/e2e/framework/util.go index a4627025d695..2617469e87df 100644 --- a/test/e2e/framework/util.go +++ b/test/e2e/framework/util.go @@ -85,6 +85,7 @@ import ( nodectlr "k8s.io/kubernetes/pkg/controller/nodelifecycle" "k8s.io/kubernetes/pkg/controller/service" "k8s.io/kubernetes/pkg/features" + kubepod "k8s.io/kubernetes/pkg/kubelet/pod" "k8s.io/kubernetes/pkg/kubelet/util/format" "k8s.io/kubernetes/pkg/master/ports" "k8s.io/kubernetes/pkg/scheduler/algorithm/predicates" @@ -177,6 +178,10 @@ const ( // How long PVs have to become deleted PVDeletingTimeout = 3 * time.Minute + // How long a node is allowed to become "Ready" after it is recreated before + // the test is considered failed. + RecreateNodeReadyAgainTimeout = 10 * time.Minute + // How long a node is allowed to become "Ready" after it is restarted before // the test is considered failed. RestartNodeReadyAgainTimeout = 5 * time.Minute @@ -3304,6 +3309,55 @@ func WaitForPodsReady(c clientset.Interface, ns, name string, minReadySeconds in }) } +// WaitForNPods tries to list restarting pods using ps until it finds expect of them, +// returning their names if it can do so before timeout. +func WaitForNRestartablePods(ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) { + var pods []*v1.Pod + var errLast error + found := wait.Poll(Poll, timeout, func() (bool, error) { + allPods := ps.List() + pods = FilterNonRestartablePods(allPods) + if len(pods) != expect { + errLast = fmt.Errorf("expected to find %d pods but found only %d", expect, len(pods)) + Logf("Error getting pods: %v", errLast) + return false, nil + } + return true, nil + }) == nil + podNames := make([]string, len(pods)) + for i, p := range pods { + podNames[i] = p.ObjectMeta.Name + } + if !found { + return podNames, fmt.Errorf("couldn't find %d pods within %v; last error: %v", + expect, timeout, errLast) + } + return podNames, nil +} + +// FilterIrrelevantPods filters out pods that will never get recreated if deleted after termination. +func FilterNonRestartablePods(pods []*v1.Pod) []*v1.Pod { + var results []*v1.Pod + for _, p := range pods { + if isNotRestartAlwaysMirrorPod(p) { + // Mirror pods with restart policy == Never will not get + // recreated if they are deleted after the pods have + // terminated. For now, we discount such pods. + // https://github.com/kubernetes/kubernetes/issues/34003 + continue + } + results = append(results, p) + } + return results +} + +func isNotRestartAlwaysMirrorPod(p *v1.Pod) bool { + if !kubepod.IsMirrorPod(p) { + return false + } + return p.Spec.RestartPolicy != v1.RestartPolicyAlways +} + // Waits for the number of events on the given object to reach a desired count. func WaitForEvents(c clientset.Interface, ns string, objOrRef runtime.Object, desiredEventsCount int) error { return wait.Poll(Poll, 5*time.Minute, func() (bool, error) { diff --git a/test/e2e/lifecycle/BUILD b/test/e2e/lifecycle/BUILD index 24f1c8c9fbd1..0b7c5f1cf6e5 100644 --- a/test/e2e/lifecycle/BUILD +++ b/test/e2e/lifecycle/BUILD @@ -21,7 +21,6 @@ go_library( importpath = "k8s.io/kubernetes/test/e2e/lifecycle", deps = [ "//pkg/apis/core:go_default_library", - "//pkg/kubelet/pod:go_default_library", "//pkg/master/ports:go_default_library", "//staging/src/k8s.io/api/core/v1:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", @@ -29,7 +28,6 @@ go_library( "//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library", "//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library", - "//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library", "//staging/src/k8s.io/client-go/discovery:go_default_library", "//staging/src/k8s.io/client-go/kubernetes:go_default_library", "//test/e2e/chaosmonkey:go_default_library", diff --git a/test/e2e/lifecycle/restart.go b/test/e2e/lifecycle/restart.go index 4508c05c6dc5..1903c2cb07f1 100644 --- a/test/e2e/lifecycle/restart.go +++ b/test/e2e/lifecycle/restart.go @@ -17,15 +17,12 @@ limitations under the License. package lifecycle import ( - "fmt" "time" - "k8s.io/api/core/v1" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/labels" - "k8s.io/apimachinery/pkg/util/wait" - kubepod "k8s.io/kubernetes/pkg/kubelet/pod" "k8s.io/kubernetes/test/e2e/common" "k8s.io/kubernetes/test/e2e/framework" testutils "k8s.io/kubernetes/test/utils" @@ -34,28 +31,6 @@ import ( . "github.com/onsi/gomega" ) -func isNotRestartAlwaysMirrorPod(p *v1.Pod) bool { - if !kubepod.IsMirrorPod(p) { - return false - } - return p.Spec.RestartPolicy != v1.RestartPolicyAlways -} - -func filterIrrelevantPods(pods []*v1.Pod) []*v1.Pod { - var results []*v1.Pod - for _, p := range pods { - if isNotRestartAlwaysMirrorPod(p) { - // Mirror pods with restart policy == Never will not get - // recreated if they are deleted after the pods have - // terminated. For now, we discount such pods. - // https://github.com/kubernetes/kubernetes/issues/34003 - continue - } - results = append(results, p) - } - return results -} - func nodeNames(nodes []v1.Node) []string { result := make([]string, 0, len(nodes)) for i := range nodes { @@ -90,7 +65,7 @@ var _ = SIGDescribe("Restart [Disruptive]", func() { By("ensuring all pods are running and ready") allPods := ps.List() - pods := filterIrrelevantPods(allPods) + pods := framework.FilterNonRestartablePods(allPods) originalPodNames = make([]string, len(pods)) for i, p := range pods { @@ -131,7 +106,7 @@ var _ = SIGDescribe("Restart [Disruptive]", func() { // across node restarts. By("ensuring the same number of pods are running and ready after restart") podCheckStart := time.Now() - podNamesAfter, err := waitForNPods(ps, len(originalPodNames), framework.RestartPodReadyAgainTimeout) + podNamesAfter, err := framework.WaitForNRestartablePods(ps, len(originalPodNames), framework.RestartPodReadyAgainTimeout) Expect(err).NotTo(HaveOccurred()) remaining := framework.RestartPodReadyAgainTimeout - time.Since(podCheckStart) if !framework.CheckPodsRunningReadyOrSucceeded(f.ClientSet, systemNamespace, podNamesAfter, remaining) { @@ -141,31 +116,3 @@ var _ = SIGDescribe("Restart [Disruptive]", func() { } }) }) - -// waitForNPods tries to list pods using c until it finds expect of them, -// returning their names if it can do so before timeout. -func waitForNPods(ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) { - // Loop until we find expect pods or timeout is passed. - var pods []*v1.Pod - var errLast error - found := wait.Poll(framework.Poll, timeout, func() (bool, error) { - allPods := ps.List() - pods = filterIrrelevantPods(allPods) - if len(pods) != expect { - errLast = fmt.Errorf("expected to find %d pods but found only %d", expect, len(pods)) - framework.Logf("Error getting pods: %v", errLast) - return false, nil - } - return true, nil - }) == nil - // Extract the names of all found pods. - podNames := make([]string, len(pods)) - for i, p := range pods { - podNames[i] = p.ObjectMeta.Name - } - if !found { - return podNames, fmt.Errorf("couldn't find %d pods within %v; last error: %v", - expect, timeout, errLast) - } - return podNames, nil -}