Skip to content

Commit

Permalink
add recreate node e2e test
Browse files Browse the repository at this point in the history
Move recreate_node test to gce provider tests.
  • Loading branch information
jiayingz authored and chardch committed Apr 12, 2019
1 parent c4f97fa commit 8175579
Show file tree
Hide file tree
Showing 9 changed files with 289 additions and 60 deletions.
1 change: 1 addition & 0 deletions hack/.golint_failures
Expand Up @@ -640,6 +640,7 @@ test/e2e/autoscaling
test/e2e/chaosmonkey
test/e2e/common
test/e2e/framework
test/e2e/framework/providers/gce
test/e2e/lifecycle
test/e2e/lifecycle/bootstrap
test/e2e/network
Expand Down
4 changes: 2 additions & 2 deletions test/e2e/common/util.go
Expand Up @@ -22,7 +22,7 @@ import (
"text/template"
"time"

"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/apimachinery/pkg/util/sets"
Expand Down Expand Up @@ -172,7 +172,7 @@ func RestartNodes(c clientset.Interface, nodes []v1.Node) error {
// Wait for their boot IDs to change.
for i := range nodes {
node := &nodes[i]
if err := wait.Poll(30*time.Second, 5*time.Minute, func() (bool, error) {
if err := wait.Poll(30*time.Second, framework.RestartNodeReadyAgainTimeout, func() (bool, error) {
newNode, err := c.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
if err != nil {
return false, fmt.Errorf("error getting node info after reboot: %s", err)
Expand Down
1 change: 1 addition & 0 deletions test/e2e/framework/BUILD
Expand Up @@ -58,6 +58,7 @@ go_library(
"//pkg/kubelet/dockershim/metrics:go_default_library",
"//pkg/kubelet/events:go_default_library",
"//pkg/kubelet/metrics:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/kubelet/sysctl:go_default_library",
"//pkg/kubelet/util/format:go_default_library",
"//pkg/master/ports:go_default_library",
Expand Down
6 changes: 6 additions & 0 deletions test/e2e/framework/providers/gce/BUILD
Expand Up @@ -6,20 +6,26 @@ go_library(
"firewall.go",
"gce.go",
"ingress.go",
"recreate_node.go",
"util.go",
],
importpath = "k8s.io/kubernetes/test/e2e/framework/providers/gce",
visibility = ["//visibility:public"],
deps = [
"//pkg/cloudprovider/providers/gce:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/uuid:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//staging/src/k8s.io/cloud-provider:go_default_library",
"//test/e2e/framework:go_default_library",
"//test/utils:go_default_library",
"//vendor/github.com/onsi/ginkgo:go_default_library",
"//vendor/github.com/onsi/gomega:go_default_library",
"//vendor/google.golang.org/api/compute/v1:go_default_library",
"//vendor/google.golang.org/api/googleapi:go_default_library",
"//vendor/k8s.io/utils/exec:go_default_library",
Expand Down
131 changes: 131 additions & 0 deletions test/e2e/framework/providers/gce/recreate_node.go
@@ -0,0 +1,131 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gce

import (
"fmt"
"time"

. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
testutils "k8s.io/kubernetes/test/utils"
)

func nodeNames(nodes []v1.Node) []string {
result := make([]string, 0, len(nodes))
for i := range nodes {
result = append(result, nodes[i].Name)
}
return result
}

func podNames(pods []v1.Pod) []string {
result := make([]string, 0, len(pods))
for i := range pods {
result = append(result, pods[i].Name)
}
return result
}

var _ = Describe("Recreate [Feature:Recreate]", func() {
f := framework.NewDefaultFramework("recreate")
var originalNodes []v1.Node
var originalPodNames []string
var ps *testutils.PodStore
systemNamespace := metav1.NamespaceSystem
BeforeEach(func() {
framework.SkipUnlessProviderIs("gce", "gke")
var err error
numNodes, err := framework.NumberOfRegisteredNodes(f.ClientSet)
Expect(err).NotTo(HaveOccurred())
originalNodes, err = framework.CheckNodesReady(f.ClientSet, numNodes, framework.NodeReadyInitialTimeout)
Expect(err).NotTo(HaveOccurred())

framework.Logf("Got the following nodes before recreate %v", nodeNames(originalNodes))

ps, err = testutils.NewPodStore(f.ClientSet, systemNamespace, labels.Everything(), fields.Everything())
allPods := ps.List()
originalPods := framework.FilterNonRestartablePods(allPods)
originalPodNames = make([]string, len(originalPods))
for i, p := range originalPods {
originalPodNames[i] = p.ObjectMeta.Name
}

if !framework.CheckPodsRunningReadyOrSucceeded(f.ClientSet, systemNamespace, originalPodNames, framework.PodReadyBeforeTimeout) {
framework.Failf("At least one pod wasn't running and ready or succeeded at test start.")
}

})

AfterEach(func() {
if CurrentGinkgoTestDescription().Failed {
// Make sure that addon/system pods are running, so dump
// events for the kube-system namespace on failures
By(fmt.Sprintf("Collecting events from namespace %q.", systemNamespace))
events, err := f.ClientSet.CoreV1().Events(systemNamespace).List(metav1.ListOptions{})
Expect(err).NotTo(HaveOccurred())

for _, e := range events.Items {
framework.Logf("event for %v: %v %v: %v", e.InvolvedObject.Name, e.Source, e.Reason, e.Message)
}
}
if ps != nil {
ps.Stop()
}
})

It("recreate nodes and ensure they function upon restart", func() {
testRecreate(f.ClientSet, ps, systemNamespace, originalNodes, originalPodNames)
})
})

// Recreate all the nodes in the test instance group
func testRecreate(c clientset.Interface, ps *testutils.PodStore, systemNamespace string, nodes []v1.Node, podNames []string) {
err := recreateNodes(c, nodes)
if err != nil {
framework.Failf("Test failed; failed to start the restart instance group command.")
}

err = waitForNodeBootIdsToChange(c, nodes, framework.RecreateNodeReadyAgainTimeout)
if err != nil {
framework.Failf("Test failed; failed to recreate at least one node in %v.", framework.RecreateNodeReadyAgainTimeout)
}

nodesAfter, err := framework.CheckNodesReady(c, len(nodes), framework.RestartNodeReadyAgainTimeout)
Expect(err).NotTo(HaveOccurred())
framework.Logf("Got the following nodes after recreate: %v", nodeNames(nodesAfter))

if len(nodes) != len(nodesAfter) {
framework.Failf("Had %d nodes before nodes were recreated, but now only have %d",
len(nodes), len(nodesAfter))
}

// Make sure the pods from before node recreation are running/completed
podCheckStart := time.Now()
podNamesAfter, err := framework.WaitForNRestartablePods(ps, len(podNames), framework.RestartPodReadyAgainTimeout)
Expect(err).NotTo(HaveOccurred())
remaining := framework.RestartPodReadyAgainTimeout - time.Since(podCheckStart)
if !framework.CheckPodsRunningReadyOrSucceeded(c, systemNamespace, podNamesAfter, remaining) {
framework.Failf("At least one pod wasn't running and ready after the restart.")
}
}
91 changes: 91 additions & 0 deletions test/e2e/framework/providers/gce/util.go
@@ -0,0 +1,91 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package gce

import (
"fmt"
"strings"
"time"

v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/kubernetes/test/e2e/framework"
)

func recreateNodes(c clientset.Interface, nodes []v1.Node) error {
// Build mapping from zone to nodes in that zone.
nodeNamesByZone := make(map[string][]string)
for i := range nodes {
node := &nodes[i]
zone := framework.TestContext.CloudConfig.Zone
if z, ok := node.Labels[v1.LabelZoneFailureDomain]; ok {
zone = z
}
nodeNamesByZone[zone] = append(nodeNamesByZone[zone], node.Name)
}

// Find the sole managed instance group name
var instanceGroup string
if strings.Index(framework.TestContext.CloudConfig.NodeInstanceGroup, ",") >= 0 {
return fmt.Errorf("Test does not support cluster setup with more than one managed instance group: %s", framework.TestContext.CloudConfig.NodeInstanceGroup)
}
instanceGroup = framework.TestContext.CloudConfig.NodeInstanceGroup

// Recreate the nodes.
for zone, nodeNames := range nodeNamesByZone {
args := []string{
"compute",
fmt.Sprintf("--project=%s", framework.TestContext.CloudConfig.ProjectID),
"instance-groups",
"managed",
"recreate-instances",
instanceGroup,
}

args = append(args, fmt.Sprintf("--instances=%s", strings.Join(nodeNames, ",")))
args = append(args, fmt.Sprintf("--zone=%s", zone))
framework.Logf("Recreating instance group %s.", instanceGroup)
stdout, stderr, err := framework.RunCmd("gcloud", args...)
if err != nil {
return fmt.Errorf("error restarting nodes: %s\nstdout: %s\nstderr: %s", err, stdout, stderr)
}
}
return nil
}

func waitForNodeBootIdsToChange(c clientset.Interface, nodes []v1.Node, timeout time.Duration) error {
errMsg := []string{}
for i := range nodes {
node := &nodes[i]
if err := wait.Poll(30*time.Second, timeout, func() (bool, error) {
newNode, err := c.CoreV1().Nodes().Get(node.Name, metav1.GetOptions{})
if err != nil {
framework.Logf("Could not get node info: %s. Retrying in %v.", err, 30*time.Second)
return false, nil
}
return node.Status.NodeInfo.BootID != newNode.Status.NodeInfo.BootID, nil
}); err != nil {
errMsg = append(errMsg, "Error waiting for node %s boot ID to change: %s", node.Name, err.Error())
}
}
if len(errMsg) > 0 {
return fmt.Errorf(strings.Join(errMsg, ","))
}
return nil
}
54 changes: 54 additions & 0 deletions test/e2e/framework/util.go
Expand Up @@ -85,6 +85,7 @@ import (
nodectlr "k8s.io/kubernetes/pkg/controller/nodelifecycle"
"k8s.io/kubernetes/pkg/controller/service"
"k8s.io/kubernetes/pkg/features"
kubepod "k8s.io/kubernetes/pkg/kubelet/pod"
"k8s.io/kubernetes/pkg/kubelet/util/format"
"k8s.io/kubernetes/pkg/master/ports"
"k8s.io/kubernetes/pkg/scheduler/algorithm/predicates"
Expand Down Expand Up @@ -177,6 +178,10 @@ const (
// How long PVs have to become deleted
PVDeletingTimeout = 3 * time.Minute

// How long a node is allowed to become "Ready" after it is recreated before
// the test is considered failed.
RecreateNodeReadyAgainTimeout = 10 * time.Minute

// How long a node is allowed to become "Ready" after it is restarted before
// the test is considered failed.
RestartNodeReadyAgainTimeout = 5 * time.Minute
Expand Down Expand Up @@ -3304,6 +3309,55 @@ func WaitForPodsReady(c clientset.Interface, ns, name string, minReadySeconds in
})
}

// WaitForNPods tries to list restarting pods using ps until it finds expect of them,
// returning their names if it can do so before timeout.
func WaitForNRestartablePods(ps *testutils.PodStore, expect int, timeout time.Duration) ([]string, error) {
var pods []*v1.Pod
var errLast error
found := wait.Poll(Poll, timeout, func() (bool, error) {
allPods := ps.List()
pods = FilterNonRestartablePods(allPods)
if len(pods) != expect {
errLast = fmt.Errorf("expected to find %d pods but found only %d", expect, len(pods))
Logf("Error getting pods: %v", errLast)
return false, nil
}
return true, nil
}) == nil
podNames := make([]string, len(pods))
for i, p := range pods {
podNames[i] = p.ObjectMeta.Name
}
if !found {
return podNames, fmt.Errorf("couldn't find %d pods within %v; last error: %v",
expect, timeout, errLast)
}
return podNames, nil
}

// FilterIrrelevantPods filters out pods that will never get recreated if deleted after termination.
func FilterNonRestartablePods(pods []*v1.Pod) []*v1.Pod {
var results []*v1.Pod
for _, p := range pods {
if isNotRestartAlwaysMirrorPod(p) {
// Mirror pods with restart policy == Never will not get
// recreated if they are deleted after the pods have
// terminated. For now, we discount such pods.
// https://github.com/kubernetes/kubernetes/issues/34003
continue
}
results = append(results, p)
}
return results
}

func isNotRestartAlwaysMirrorPod(p *v1.Pod) bool {
if !kubepod.IsMirrorPod(p) {
return false
}
return p.Spec.RestartPolicy != v1.RestartPolicyAlways
}

// Waits for the number of events on the given object to reach a desired count.
func WaitForEvents(c clientset.Interface, ns string, objOrRef runtime.Object, desiredEventsCount int) error {
return wait.Poll(Poll, 5*time.Minute, func() (bool, error) {
Expand Down
2 changes: 0 additions & 2 deletions test/e2e/lifecycle/BUILD
Expand Up @@ -21,15 +21,13 @@ go_library(
importpath = "k8s.io/kubernetes/test/e2e/lifecycle",
deps = [
"//pkg/apis/core:go_default_library",
"//pkg/kubelet/pod:go_default_library",
"//pkg/master/ports:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/fields:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/labels:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/sets:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/version:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/util/wait:go_default_library",
"//staging/src/k8s.io/client-go/discovery:go_default_library",
"//staging/src/k8s.io/client-go/kubernetes:go_default_library",
"//test/e2e/chaosmonkey:go_default_library",
Expand Down

0 comments on commit 8175579

Please sign in to comment.