From 6f203d6463a0cae450f7e0cf65767966ada193cb Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Thu, 7 Feb 2019 15:40:31 -0800 Subject: [PATCH 1/2] allows configuring NPD release and flags on GCI and add cluster e2e test --- cluster/gce/config-default.sh | 2 + cluster/gce/config-test.sh | 2 + cluster/gce/gci/configure-helper.sh | 30 +-- cluster/gce/gci/configure.sh | 6 +- cluster/gce/util.sh | 2 + test/e2e/framework/kubelet_stats.go | 6 +- test/e2e/node/BUILD | 2 + test/e2e/node/node_problem_detector.go | 282 +++++++++++++++++++++++++ 8 files changed, 313 insertions(+), 19 deletions(-) create mode 100644 test/e2e/node/node_problem_detector.go diff --git a/cluster/gce/config-default.sh b/cluster/gce/config-default.sh index da8e119c9e52..7c8003c60861 100755 --- a/cluster/gce/config-default.sh +++ b/cluster/gce/config-default.sh @@ -284,6 +284,8 @@ else fi NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}" NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}" +NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}" +NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}" # Optional: Create autoscaler for cluster's nodes. ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}" diff --git a/cluster/gce/config-test.sh b/cluster/gce/config-test.sh index fcbb9597e8e1..debc10afddc5 100755 --- a/cluster/gce/config-test.sh +++ b/cluster/gce/config-test.sh @@ -291,6 +291,8 @@ else fi NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}" NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}" +NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}" +NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}" # Optional: Create autoscaler for cluster's nodes. ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}" diff --git a/cluster/gce/gci/configure-helper.sh b/cluster/gce/gci/configure-helper.sh index 11feaeeaaa38..ee66d1e74912 100644 --- a/cluster/gce/gci/configure-helper.sh +++ b/cluster/gce/gci/configure-helper.sh @@ -1197,21 +1197,25 @@ EOF function start-node-problem-detector { echo "Start node problem detector" local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector" - local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json" - # TODO(random-liu): Handle this for alternative container runtime. - local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json" - local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json" echo "Using node problem detector binary at ${npd_bin}" - local flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}" - flags+=" --logtostderr" - flags+=" --system-log-monitors=${km_config},${dm_config}" - flags+=" --custom-plugin-monitors=${custom_km_config}" - flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig" - local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256} - flags+=" --port=${npd_port}" - if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then - flags+=" ${EXTRA_NPD_ARGS}" + + local flags="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}" + if [[ -z "${flags}" ]]; then + local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json" + # TODO(random-liu): Handle this for alternative container runtime. + local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json" + local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json" + flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}" + flags+=" --logtostderr" + flags+=" --system-log-monitors=${km_config},${dm_config}" + flags+=" --custom-plugin-monitors=${custom_km_config}" + local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256} + flags+=" --port=${npd_port}" + if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then + flags+=" ${EXTRA_NPD_ARGS}" + fi fi + flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig" # Write the systemd service file for node problem detector. cat </etc/systemd/system/node-problem-detector.service diff --git a/cluster/gce/gci/configure.sh b/cluster/gce/gci/configure.sh index 82b9fe3d1ac1..6a40a6894ebe 100644 --- a/cluster/gce/gci/configure.sh +++ b/cluster/gce/gci/configure.sh @@ -202,12 +202,12 @@ function install-node-problem-detector { local -r npd_tar="node-problem-detector-${npd_version}.tar.gz" if is-preloaded "${npd_tar}" "${npd_sha1}"; then - echo "node-problem-detector is preloaded." + echo "${npd_tar} is preloaded." return fi - echo "Downloading node problem detector." - local -r npd_release_path="https://storage.googleapis.com/kubernetes-release" + echo "Downloading ${npd_tar}." + local -r npd_release_path="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-https://storage.googleapis.com/kubernetes-release}" download-or-bust "${npd_sha1}" "${npd_release_path}/node-problem-detector/${npd_tar}" local -r npd_dir="${KUBE_HOME}/node-problem-detector" mkdir -p "${npd_dir}" diff --git a/cluster/gce/util.sh b/cluster/gce/util.sh index 5ca120491ab6..e6d77410db47 100755 --- a/cluster/gce/util.sh +++ b/cluster/gce/util.sh @@ -829,6 +829,8 @@ ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false}) ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-none}) NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote ${NODE_PROBLEM_DETECTOR_VERSION:-}) NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_TAR_HASH:-}) +NODE_PROBLEM_DETECTOR_RELEASE_PATH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}) +NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS: $(yaml-quote ${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}) ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false}) ENABLE_RESCHEDULER: $(yaml-quote ${ENABLE_RESCHEDULER:-false}) LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-}) diff --git a/test/e2e/framework/kubelet_stats.go b/test/e2e/framework/kubelet_stats.go index 1304cbfcf599..da077b3e220c 100644 --- a/test/e2e/framework/kubelet_stats.go +++ b/test/e2e/framework/kubelet_stats.go @@ -281,8 +281,8 @@ func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration return badMetrics, nil } -// getStatsSummary contacts kubelet for the container information. -func getStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) { +// GetStatsSummary contacts kubelet for the container information. +func GetStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) { ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout) defer cancel() @@ -348,7 +348,7 @@ func getOneTimeResourceUsageOnNode( return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest) } // Get information of all containers on the node. - summary, err := getStatsSummary(c, nodeName) + summary, err := GetStatsSummary(c, nodeName) if err != nil { return nil, err } diff --git a/test/e2e/node/BUILD b/test/e2e/node/BUILD index 88305fc4320e..878d69e1999d 100644 --- a/test/e2e/node/BUILD +++ b/test/e2e/node/BUILD @@ -9,6 +9,7 @@ go_library( "kubelet.go", "kubelet_perf.go", "mount_propagation.go", + "node_problem_detector.go", "pod_gc.go", "pods.go", "pre_stop.go", @@ -18,6 +19,7 @@ go_library( importpath = "k8s.io/kubernetes/test/e2e/node", visibility = ["//visibility:public"], deps = [ + "//pkg/api/v1/node:go_default_library", "//pkg/kubelet/apis/stats/v1alpha1:go_default_library", "//test/e2e/common:go_default_library", "//test/e2e/framework:go_default_library", diff --git a/test/e2e/node/node_problem_detector.go b/test/e2e/node/node_problem_detector.go new file mode 100644 index 000000000000..3f3289421be2 --- /dev/null +++ b/test/e2e/node/node_problem_detector.go @@ -0,0 +1,282 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package node + +import ( + "fmt" + "net" + "sort" + "strconv" + "strings" + "time" + + "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" + nodeutil "k8s.io/kubernetes/pkg/api/v1/node" + "k8s.io/kubernetes/test/e2e/framework" + + . "github.com/onsi/ginkgo" + . "github.com/onsi/gomega" +) + +// This test checks if node-problem-detector (NPD) runs fine without error on +// the nodes in the cluster. NPD's functionality is tested in e2e_node tests. +var _ = SIGDescribe("NodeProblemDetector", func() { + const ( + pollInterval = 1 * time.Second + pollTimeout = 1 * time.Minute + ) + f := framework.NewDefaultFramework("node-problem-detector") + + BeforeEach(func() { + framework.SkipUnlessSSHKeyPresent() + framework.SkipUnlessProviderIs(framework.ProvidersWithSSH...) + framework.SkipUnlessProviderIs("gce", "gke") + framework.SkipUnlessNodeOSDistroIs("gci", "ubuntu") + framework.WaitForAllNodesHealthy(f.ClientSet, time.Minute) + }) + + It("should run without error", func() { + By("Getting all nodes and their SSH-able IP addresses") + nodes := framework.GetReadySchedulableNodesOrDie(f.ClientSet) + Expect(len(nodes.Items)).NotTo(BeZero()) + hosts := []string{} + for _, node := range nodes.Items { + for _, addr := range node.Status.Addresses { + if addr.Type == v1.NodeExternalIP { + hosts = append(hosts, net.JoinHostPort(addr.Address, "22")) + break + } + } + } + Expect(len(hosts)).To(Equal(len(nodes.Items))) + + isStandaloneMode := make(map[string]bool) + cpuUsageStats := make(map[string][]float64) + uptimeStats := make(map[string][]float64) + rssStats := make(map[string][]float64) + workingSetStats := make(map[string][]float64) + + for _, host := range hosts { + cpuUsageStats[host] = []float64{} + uptimeStats[host] = []float64{} + rssStats[host] = []float64{} + workingSetStats[host] = []float64{} + + cmd := "systemctl status node-problem-detector.service" + result, err := framework.SSH(cmd, host, framework.TestContext.Provider) + isStandaloneMode[host] = (err == nil && result.Code == 0) + + By(fmt.Sprintf("Check node %q has node-problem-detector process", host)) + // Using brackets "[n]" is a trick to prevent grep command itself from + // showing up, because string text "[n]ode-problem-detector" does not + // match regular expression "[n]ode-problem-detector". + psCmd := "ps aux | grep [n]ode-problem-detector" + result, err = framework.SSH(psCmd, host, framework.TestContext.Provider) + framework.ExpectNoError(err) + Expect(result.Code).To(BeZero()) + Expect(result.Stdout).To(ContainSubstring("node-problem-detector")) + + By(fmt.Sprintf("Check node-problem-detector is running fine on node %q", host)) + journalctlCmd := "sudo journalctl -u node-problem-detector" + result, err = framework.SSH(journalctlCmd, host, framework.TestContext.Provider) + framework.ExpectNoError(err) + Expect(result.Code).To(BeZero()) + Expect(result.Stdout).NotTo(ContainSubstring("node-problem-detector.service: Failed")) + + if isStandaloneMode[host] { + cpuUsage, uptime := getCpuStat(f, host) + cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage) + uptimeStats[host] = append(uptimeStats[host], uptime) + } + + By(fmt.Sprintf("Inject log to trigger AUFSUmountHung on node %q", host)) + log := "INFO: task umount.aufs:21568 blocked for more than 120 seconds." + injectLogCmd := "sudo sh -c \"echo 'kernel: " + log + "' >> /dev/kmsg\"" + _, err = framework.SSH(injectLogCmd, host, framework.TestContext.Provider) + framework.ExpectNoError(err) + Expect(result.Code).To(BeZero()) + } + + By("Check node-problem-detector can post conditions and events to API server") + for _, node := range nodes.Items { + By(fmt.Sprintf("Check node-problem-detector posted KernelDeadlock condition on node %q", node.Name)) + Eventually(func() error { + return verifyNodeCondition(f, "KernelDeadlock", v1.ConditionTrue, "AUFSUmountHung", node.Name) + }, pollTimeout, pollInterval).Should(Succeed()) + + By(fmt.Sprintf("Check node-problem-detector posted AUFSUmountHung event on node %q", node.Name)) + eventListOptions := metav1.ListOptions{FieldSelector: fields.Set{"involvedObject.kind": "Node"}.AsSelector().String()} + Eventually(func() error { + return verifyEvents(f, eventListOptions, 1, "AUFSUmountHung", node.Name) + }, pollTimeout, pollInterval).Should(Succeed()) + } + + By("Gather node-problem-detector cpu and memory stats") + numIterations := 60 + for i := 1; i <= numIterations; i++ { + for j, host := range hosts { + if isStandaloneMode[host] { + rss, workingSet := getMemoryStat(f, host) + rssStats[host] = append(rssStats[host], rss) + workingSetStats[host] = append(workingSetStats[host], workingSet) + if i == numIterations { + cpuUsage, uptime := getCpuStat(f, host) + cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage) + uptimeStats[host] = append(uptimeStats[host], uptime) + } + } else { + cpuUsage, rss, workingSet := getNpdPodStat(f, nodes.Items[j].Name) + cpuUsageStats[host] = append(cpuUsageStats[host], cpuUsage) + rssStats[host] = append(rssStats[host], rss) + workingSetStats[host] = append(workingSetStats[host], workingSet) + } + } + time.Sleep(time.Second) + } + + cpuStatsMsg := "CPU (core):" + rssStatsMsg := "RSS (MB):" + workingSetStatsMsg := "WorkingSet (MB):" + for i, host := range hosts { + if isStandaloneMode[host] { + // When in standalone mode, NPD is running as systemd service. We + // calculate its cpu usage from cgroup cpuacct value differences. + cpuUsage := cpuUsageStats[host][1] - cpuUsageStats[host][0] + totaltime := uptimeStats[host][1] - uptimeStats[host][0] + cpuStatsMsg += fmt.Sprintf(" %s[%.3f];", nodes.Items[i].Name, cpuUsage/totaltime) + } else { + sort.Float64s(cpuUsageStats[host]) + cpuStatsMsg += fmt.Sprintf(" %s[%.3f|%.3f|%.3f];", nodes.Items[i].Name, + cpuUsageStats[host][0], cpuUsageStats[host][len(cpuUsageStats[host])/2], cpuUsageStats[host][len(cpuUsageStats[host])-1]) + } + + sort.Float64s(rssStats[host]) + rssStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name, + rssStats[host][0], rssStats[host][len(rssStats[host])/2], rssStats[host][len(rssStats[host])-1]) + + sort.Float64s(workingSetStats[host]) + workingSetStatsMsg += fmt.Sprintf(" %s[%.1f|%.1f|%.1f];", nodes.Items[i].Name, + workingSetStats[host][0], workingSetStats[host][len(workingSetStats[host])/2], workingSetStats[host][len(workingSetStats[host])-1]) + } + framework.Logf("Node-Problem-Detector CPU and Memory Stats:\n\t%s\n\t%s\n\t%s", cpuStatsMsg, rssStatsMsg, workingSetStatsMsg) + }) +}) + +func verifyEvents(f *framework.Framework, options metav1.ListOptions, num int, reason, nodeName string) error { + events, err := f.ClientSet.CoreV1().Events(metav1.NamespaceDefault).List(options) + if err != nil { + return err + } + count := 0 + for _, event := range events.Items { + if event.Reason != reason || event.Source.Host != nodeName { + continue + } + count += int(event.Count) + } + if count != num { + return fmt.Errorf("expect event number %d, got %d: %v", num, count, events.Items) + } + return nil +} + +func verifyNodeCondition(f *framework.Framework, condition v1.NodeConditionType, status v1.ConditionStatus, reason, nodeName string) error { + node, err := f.ClientSet.CoreV1().Nodes().Get(nodeName, metav1.GetOptions{}) + if err != nil { + return err + } + _, c := nodeutil.GetNodeCondition(&node.Status, condition) + if c == nil { + return fmt.Errorf("node condition %q not found", condition) + } + if c.Status != status || c.Reason != reason { + return fmt.Errorf("unexpected node condition %q: %+v", condition, c) + } + return nil +} + +func getMemoryStat(f *framework.Framework, host string) (rss, workingSet float64) { + memCmd := "cat /sys/fs/cgroup/memory/system.slice/node-problem-detector.service/memory.usage_in_bytes && cat /sys/fs/cgroup/memory/system.slice/node-problem-detector.service/memory.stat" + result, err := framework.SSH(memCmd, host, framework.TestContext.Provider) + framework.ExpectNoError(err) + Expect(result.Code).To(BeZero()) + lines := strings.Split(result.Stdout, "\n") + + memoryUsage, err := strconv.ParseFloat(lines[0], 64) + Expect(err).To(BeNil()) + + var totalInactiveFile float64 + for _, line := range lines[1:] { + tokens := strings.Split(line, " ") + if tokens[0] == "total_rss" { + rss, err = strconv.ParseFloat(tokens[1], 64) + Expect(err).To(BeNil()) + } + if tokens[0] == "total_inactive_file" { + totalInactiveFile, err = strconv.ParseFloat(tokens[1], 64) + Expect(err).To(BeNil()) + } + } + + workingSet = memoryUsage + if workingSet < totalInactiveFile { + workingSet = 0 + } else { + workingSet -= totalInactiveFile + } + + // Convert to MB + rss = rss / 1024 / 1024 + workingSet = workingSet / 1024 / 1024 + return +} + +func getCpuStat(f *framework.Framework, host string) (usage, uptime float64) { + cpuCmd := "cat /sys/fs/cgroup/cpu/system.slice/node-problem-detector.service/cpuacct.usage && cat /proc/uptime | awk '{print $1}'" + result, err := framework.SSH(cpuCmd, host, framework.TestContext.Provider) + framework.ExpectNoError(err) + Expect(result.Code).To(BeZero()) + lines := strings.Split(result.Stdout, "\n") + + usage, err = strconv.ParseFloat(lines[0], 64) + uptime, err = strconv.ParseFloat(lines[1], 64) + + // Convert from nanoseconds to seconds + usage *= 1e-9 + return +} + +func getNpdPodStat(f *framework.Framework, nodeName string) (cpuUsage, rss, workingSet float64) { + summary, err := framework.GetStatsSummary(f.ClientSet, nodeName) + framework.ExpectNoError(err) + + hasNpdPod := false + for _, pod := range summary.Pods { + if !strings.HasPrefix(pod.PodRef.Name, "npd") { + continue + } + cpuUsage = float64(*pod.CPU.UsageNanoCores) * 1e-9 + rss = float64(*pod.Memory.RSSBytes) / 1024 / 1024 + workingSet = float64(*pod.Memory.WorkingSetBytes) / 1024 / 1024 + hasNpdPod = true + break + } + Expect(hasNpdPod).To(BeTrue()) + return +} From ffa6f476c4e96343a3bfbc8e33b3d84bef3e04af Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Thu, 7 Feb 2019 15:41:16 -0800 Subject: [PATCH 2/2] allows configuring NPD image version in node e2e test and fix the test --- hack/make-rules/test-e2e-node.sh | 7 +-- test/e2e/framework/test_context.go | 3 ++ test/e2e_node/conformance/build/Dockerfile | 5 +- test/e2e_node/e2e_node_suite_test.go | 23 ++++++++- test/e2e_node/image_list.go | 19 ++++++- .../conformance/conformance-jenkins.sh | 3 +- test/e2e_node/jenkins/e2e-node-jenkins.sh | 3 +- test/e2e_node/node_problem_detector_linux.go | 51 ++++++++++++------- test/e2e_node/remote/cadvisor_e2e.go | 2 +- test/e2e_node/remote/node_conformance.go | 6 +-- test/e2e_node/remote/node_e2e.go | 6 +-- test/e2e_node/remote/remote.go | 4 +- test/e2e_node/remote/types.go | 3 +- test/e2e_node/runner/local/run_local.go | 3 +- test/e2e_node/runner/remote/run_remote.go | 3 +- 15 files changed, 103 insertions(+), 38 deletions(-) diff --git a/hack/make-rules/test-e2e-node.sh b/hack/make-rules/test-e2e-node.sh index 2e5c95ae2646..451486ca39c8 100755 --- a/hack/make-rules/test-e2e-node.sh +++ b/hack/make-rules/test-e2e-node.sh @@ -34,6 +34,7 @@ image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""} run_until_failure=${RUN_UNTIL_FAILURE:-"false"} test_args=${TEST_ARGS:-""} system_spec_name=${SYSTEM_SPEC_NAME:-} +extra_envs=${EXTRA_ENVS:-} # Parse the flags to pass to ginkgo ginkgoflags="" @@ -148,7 +149,7 @@ if [ $remote = true ] ; then --image-project="$image_project" --instance-name-prefix="$instance_prefix" \ --delete-instances="$delete_instances" --test_args="$test_args" --instance-metadata="$metadata" \ --image-config-file="$image_config_file" --system-spec-name="$system_spec_name" \ - --test-suite="$test_suite" \ + --extra-envs="$extra_envs" --test-suite="$test_suite" \ 2>&1 | tee -i "${artifacts}/build-log.txt" exit $? @@ -169,8 +170,8 @@ else # Test using the host the script was run on # Provided for backwards compatibility go run test/e2e_node/runner/local/run_local.go \ - --system-spec-name="$system_spec_name" --ginkgo-flags="$ginkgoflags" \ - --test-flags="--container-runtime=${runtime} \ + --system-spec-name="$system_spec_name" --extra-envs="$extra_envs" \ + --ginkgo-flags="$ginkgoflags" --test-flags="--container-runtime=${runtime} \ --alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \ $test_args" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt" exit $? diff --git a/test/e2e/framework/test_context.go b/test/e2e/framework/test_context.go index 71b6aab09f3a..81df49d707b9 100644 --- a/test/e2e/framework/test_context.go +++ b/test/e2e/framework/test_context.go @@ -160,6 +160,8 @@ type NodeTestContextType struct { // the node e2e test. If empty, the default one (system.DefaultSpec) is // used. The system specs are in test/e2e_node/system/specs/. SystemSpecName string + // ExtraEnvs is a map of environment names to values. + ExtraEnvs map[string]string } // StorageConfig contains the shared settings for storage 2e2 tests. @@ -301,6 +303,7 @@ func RegisterNodeFlags() { flag.BoolVar(&TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.") flag.StringVar(&TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.") flag.StringVar(&TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.") + flag.Var(utilflag.NewMapStringString(&TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2") } func RegisterStorageFlags() { diff --git a/test/e2e_node/conformance/build/Dockerfile b/test/e2e_node/conformance/build/Dockerfile index 5783726a08b3..288649683f20 100644 --- a/test/e2e_node/conformance/build/Dockerfile +++ b/test/e2e_node/conformance/build/Dockerfile @@ -27,12 +27,14 @@ COPY_SYSTEM_SPEC_FILE # REPORT_PATH is the path in the container to save test result and logs. # FLAKE_ATTEMPTS is the time to retry when there is a test failure. By default 2. # TEST_ARGS is the test arguments passed into the test. +# EXTRA_ENVS is the extra environment variables needed for node e2e tests. ENV FOCUS="\[Conformance\]" \ SKIP="\[Flaky\]|\[Serial\]" \ PARALLELISM=8 \ REPORT_PATH="/var/result" \ FLAKE_ATTEMPTS=2 \ - TEST_ARGS="" + TEST_ARGS="" \ + EXTRA_ENVS="" ENTRYPOINT ginkgo --focus="$FOCUS" \ --skip="$SKIP" \ @@ -46,4 +48,5 @@ ENTRYPOINT ginkgo --focus="$FOCUS" \ --system-spec-name=SYSTEM_SPEC_NAME \ # This is a placeholder that will be substituted in the Makefile. --system-spec-file=SYSTEM_SPEC_FILE_PATH \ + --extra-envs=$EXTRA_ENVS \ $TEST_ARGS diff --git a/test/e2e_node/e2e_node_suite_test.go b/test/e2e_node/e2e_node_suite_test.go index 7b017401f037..6d920e2a16f0 100644 --- a/test/e2e_node/e2e_node_suite_test.go +++ b/test/e2e_node/e2e_node_suite_test.go @@ -76,6 +76,7 @@ func init() { func TestMain(m *testing.M) { pflag.Parse() framework.AfterReadingAllFlags(&framework.TestContext) + setExtraEnvs() os.Exit(m.Run()) } @@ -146,6 +147,7 @@ var _ = SynchronizedBeforeSuite(func() []byte { // This helps with debugging test flakes since it is hard to tell when a test failure is due to image pulling. if framework.TestContext.PrepullImages { glog.Infof("Pre-pulling images so that they are cached for the tests.") + updateImageWhiteList() err := PrePullAllImages() Expect(err).ShouldNot(HaveOccurred()) } @@ -244,6 +246,9 @@ func waitForNodeReady() { // TODO(random-liu): Using dynamic kubelet configuration feature to // update test context with node configuration. func updateTestContext() error { + setExtraEnvs() + updateImageWhiteList() + client, err := getAPIServerClient() if err != nil { return fmt.Errorf("failed to get apiserver client: %v", err) @@ -261,7 +266,7 @@ func updateTestContext() error { if err != nil { return fmt.Errorf("failed to get kubelet configuration: %v", err) } - framework.TestContext.KubeletConfig = *kubeletCfg // Set kubelet config. + framework.TestContext.KubeletConfig = *kubeletCfg // Set kubelet config return nil } @@ -309,3 +314,19 @@ func loadSystemSpecFromFile(filename string) (*system.SysSpec, error) { } return spec, nil } + +// isNodeReady returns true if a node is ready; false otherwise. +func isNodeReady(node *v1.Node) bool { + for _, c := range node.Status.Conditions { + if c.Type == v1.NodeReady { + return c.Status == v1.ConditionTrue + } + } + return false +} + +func setExtraEnvs() { + for name, value := range framework.TestContext.ExtraEnvs { + os.Setenv(name, value) + } +} diff --git a/test/e2e_node/image_list.go b/test/e2e_node/image_list.go index d35bb5acaae2..a11b902142c1 100644 --- a/test/e2e_node/image_list.go +++ b/test/e2e_node/image_list.go @@ -18,6 +18,7 @@ package e2e_node import ( "fmt" + "os" "os/exec" "os/user" "time" @@ -46,7 +47,6 @@ var NodeImageWhiteList = sets.NewString( "k8s.gcr.io/stress:v1", busyboxImage, "k8s.gcr.io/busybox@sha256:4bdd623e848417d96127e16037743f0cd8b528c026e9175e22a84f639eca58ff", - "k8s.gcr.io/node-problem-detector:v0.4.1", imageutils.GetE2EImage(imageutils.NginxSlim), imageutils.GetE2EImage(imageutils.ServeHostname), imageutils.GetE2EImage(imageutils.Netexec), @@ -55,9 +55,24 @@ var NodeImageWhiteList = sets.NewString( framework.GetGPUDevicePluginImage(), ) -func init() { +// updateImageWhiteList updates the framework.ImageWhiteList with +// 1. the hard coded lists +// 2. the ones passed in from framework.TestContext.ExtraEnvs +// So this function needs to be called after the extra envs are applied. +func updateImageWhiteList() { // Union NodeImageWhiteList and CommonImageWhiteList into the framework image white list. framework.ImageWhiteList = NodeImageWhiteList.Union(commontest.CommonImageWhiteList) + // Images from extra envs + framework.ImageWhiteList.Insert(getNodeProblemDetectorImage()) +} + +func getNodeProblemDetectorImage() string { + const defaultImage string = "k8s.gcr.io/node-problem-detector:v0.6.2" + image := os.Getenv("NODE_PROBLEM_DETECTOR_IMAGE") + if image == "" { + image = defaultImage + } + return image } // puller represents a generic image puller diff --git a/test/e2e_node/jenkins/conformance/conformance-jenkins.sh b/test/e2e_node/jenkins/conformance/conformance-jenkins.sh index 9e8715287cf1..7758d0b2df66 100755 --- a/test/e2e_node/jenkins/conformance/conformance-jenkins.sh +++ b/test/e2e_node/jenkins/conformance/conformance-jenkins.sh @@ -40,4 +40,5 @@ go run test/e2e_node/runner/remote/run_remote.go --test-suite=conformance \ --results-dir="$ARTIFACTS" --test-timeout="$TIMEOUT" \ --test_args="--kubelet-flags=\"$KUBELET_ARGS\"" \ --instance-metadata="$GCE_INSTANCE_METADATA" \ - --system-spec-name="$SYSTEM_SPEC_NAME" + --system-spec-name="$SYSTEM_SPEC_NAME" \ + --extra-envs="$EXTRA_ENVS" diff --git a/test/e2e_node/jenkins/e2e-node-jenkins.sh b/test/e2e_node/jenkins/e2e-node-jenkins.sh index a1caae4ad95f..99a4ac14bc38 100755 --- a/test/e2e_node/jenkins/e2e-node-jenkins.sh +++ b/test/e2e_node/jenkins/e2e-node-jenkins.sh @@ -47,4 +47,5 @@ go run test/e2e_node/runner/remote/run_remote.go --logtostderr --vmodule=*=4 \ --image-config-file="$GCE_IMAGE_CONFIG_PATH" --cleanup="$CLEANUP" \ --results-dir="$ARTIFACTS" --ginkgo-flags="--nodes=$PARALLELISM $GINKGO_FLAGS" \ --test-timeout="$TIMEOUT" --test_args="$TEST_ARGS --kubelet-flags=\"$KUBELET_ARGS\"" \ - --instance-metadata="$GCE_INSTANCE_METADATA" --system-spec-name="$SYSTEM_SPEC_NAME" + --instance-metadata="$GCE_INSTANCE_METADATA" --system-spec-name="$SYSTEM_SPEC_NAME" \ + --extra-envs="$EXTRA_ENVS" diff --git a/test/e2e_node/node_problem_detector_linux.go b/test/e2e_node/node_problem_detector_linux.go index 36a63193c9e2..a6a248abcf97 100644 --- a/test/e2e_node/node_problem_detector_linux.go +++ b/test/e2e_node/node_problem_detector_linux.go @@ -45,13 +45,14 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete pollInterval = 1 * time.Second pollConsistent = 5 * time.Second pollTimeout = 1 * time.Minute - image = "k8s.gcr.io/node-problem-detector:v0.4.1" ) f := framework.NewDefaultFramework("node-problem-detector") var c clientset.Interface var uid string var ns, name, configName, eventNamespace string var bootTime, nodeTime time.Time + var image string + BeforeEach(func() { c = f.ClientSet ns = f.Namespace.Name @@ -60,6 +61,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete configName = "node-problem-detector-config-" + uid // There is no namespace for Node, event recorder will set default namespace for node events. eventNamespace = metav1.NamespaceDefault + image = getNodeProblemDetectorImage() + By(fmt.Sprintf("Using node-problem-detector image: %s", image)) }) // Test system log monitor. We may add other tests if we have more problem daemons in the future. @@ -245,7 +248,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete timestamp time.Time message string messageNum int - events int + tempEvents int // Events for temp errors + totalEvents int // Events for both temp errors and condition changes conditionReason string conditionMessage string conditionType v1.ConditionStatus @@ -279,7 +283,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete timestamp: nodeTime, message: tempMessage, messageNum: 3, - events: 3, + tempEvents: 3, + totalEvents: 3, conditionReason: defaultReason, conditionMessage: defaultMessage, conditionType: v1.ConditionFalse, @@ -289,7 +294,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete timestamp: nodeTime, message: permMessage1, messageNum: 1, - events: 3, // event number should not change + tempEvents: 3, // event number for temp errors should not change + totalEvents: 4, // add 1 event for condition change conditionReason: permReason1, conditionMessage: permMessage1, conditionType: v1.ConditionTrue, @@ -299,7 +305,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete timestamp: nodeTime.Add(5 * time.Minute), message: tempMessage, messageNum: 3, - events: 6, + tempEvents: 6, // add 3 events for temp errors + totalEvents: 7, // add 3 events for temp errors conditionReason: permReason1, conditionMessage: permMessage1, conditionType: v1.ConditionTrue, @@ -309,7 +316,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete timestamp: nodeTime.Add(5 * time.Minute), message: permMessage1 + "different message", messageNum: 1, - events: 6, // event number should not change + tempEvents: 6, // event number should not change + totalEvents: 7, // event number should not change conditionReason: permReason1, conditionMessage: permMessage1, conditionType: v1.ConditionTrue, @@ -319,7 +327,8 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete timestamp: nodeTime.Add(5 * time.Minute), message: permMessage2, messageNum: 1, - events: 6, // event number should not change + tempEvents: 6, // event number for temp errors should not change + totalEvents: 8, // add 1 event for condition change conditionReason: permReason2, conditionMessage: permMessage2, conditionType: v1.ConditionTrue, @@ -332,13 +341,17 @@ var _ = framework.KubeDescribe("NodeProblemDetector [NodeFeature:NodeProblemDete Expect(err).NotTo(HaveOccurred()) } - By(fmt.Sprintf("Wait for %d events generated", test.events)) + By(fmt.Sprintf("Wait for %d temp events generated", test.tempEvents)) + Eventually(func() error { + return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.tempEvents, tempReason, tempMessage) + }, pollTimeout, pollInterval).Should(Succeed()) + By(fmt.Sprintf("Wait for %d total events generated", test.totalEvents)) Eventually(func() error { - return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage) + return verifyTotalEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.totalEvents) }, pollTimeout, pollInterval).Should(Succeed()) - By(fmt.Sprintf("Make sure only %d events generated", test.events)) + By(fmt.Sprintf("Make sure only %d total events generated", test.totalEvents)) Consistently(func() error { - return verifyEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.events, tempReason, tempMessage) + return verifyTotalEvents(c.CoreV1().Events(eventNamespace), eventListOptions, test.totalEvents) }, pollConsistent, pollInterval).Should(Succeed()) By(fmt.Sprintf("Make sure node condition %q is set", condition)) @@ -390,7 +403,7 @@ func injectLog(file string, timestamp time.Time, log string, num int) error { return nil } -// verifyEvents verifies there are num specific events generated +// verifyEvents verifies there are num specific events generated with given reason and message. func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int, reason, message string) error { events, err := e.List(options) if err != nil { @@ -399,7 +412,7 @@ func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, nu count := 0 for _, event := range events.Items { if event.Reason != reason || event.Message != message { - return fmt.Errorf("unexpected event: %v", event) + continue } count += int(event.Count) } @@ -409,14 +422,18 @@ func verifyEvents(e coreclientset.EventInterface, options metav1.ListOptions, nu return nil } -// verifyNoEvents verifies there is no event generated -func verifyNoEvents(e coreclientset.EventInterface, options metav1.ListOptions) error { +// verifyTotalEvents verifies there are num events in total. +func verifyTotalEvents(e coreclientset.EventInterface, options metav1.ListOptions, num int) error { events, err := e.List(options) if err != nil { return err } - if len(events.Items) != 0 { - return fmt.Errorf("unexpected events: %v", events.Items) + count := 0 + for _, event := range events.Items { + count += int(event.Count) + } + if count != num { + return fmt.Errorf("expect event number %d, got %d: %v", num, count, events.Items) } return nil } diff --git a/test/e2e_node/remote/cadvisor_e2e.go b/test/e2e_node/remote/cadvisor_e2e.go index 8bdb567d031a..76ae79aff504 100644 --- a/test/e2e_node/remote/cadvisor_e2e.go +++ b/test/e2e_node/remote/cadvisor_e2e.go @@ -63,7 +63,7 @@ func runCommand(command string, args ...string) error { } // RunTest implements TestSuite.RunTest -func (n *CAdvisorE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) { +func (n *CAdvisorE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) { // Kill any running node processes cleanupNodeProcesses(host) diff --git a/test/e2e_node/remote/node_conformance.go b/test/e2e_node/remote/node_conformance.go index 9c78ae30887c..3a6cf98ae44a 100644 --- a/test/e2e_node/remote/node_conformance.go +++ b/test/e2e_node/remote/node_conformance.go @@ -259,7 +259,7 @@ func stopKubelet(host, workspace string) error { } // RunTest runs test on the node. -func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, _, systemSpecName string, timeout time.Duration) (string, error) { +func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, _, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) { // Install the cni plugins and add a basic CNI configuration. if err := setupCNI(host, workspace); err != nil { return "", err @@ -293,8 +293,8 @@ func (c *ConformanceRemote) RunTest(host, workspace, results, imageDesc, junitFi // Run the tests glog.V(2).Infof("Starting tests on %q", host) podManifestPath := getPodPath(workspace) - cmd := fmt.Sprintf("'timeout -k 30s %fs docker run --rm --privileged=true --net=host -v /:/rootfs -v %s:%s -v %s:/var/result -e TEST_ARGS=--report-prefix=%s %s'", - timeout.Seconds(), podManifestPath, podManifestPath, results, junitFilePrefix, getConformanceTestImageName(systemSpecName)) + cmd := fmt.Sprintf("'timeout -k 30s %fs docker run --rm --privileged=true --net=host -v /:/rootfs -v %s:%s -v %s:/var/result -e TEST_ARGS=--report-prefix=%s -e EXTRA_ENVS=%s %s'", + timeout.Seconds(), podManifestPath, podManifestPath, results, junitFilePrefix, extraEnvs, getConformanceTestImageName(systemSpecName)) testOutput, err := SSH(host, "sh", "-c", cmd) if err != nil { return testOutput, err diff --git a/test/e2e_node/remote/node_e2e.go b/test/e2e_node/remote/node_e2e.go index d54b0d94b023..b3f58267273a 100644 --- a/test/e2e_node/remote/node_e2e.go +++ b/test/e2e_node/remote/node_e2e.go @@ -138,7 +138,7 @@ func updateOSSpecificKubeletFlags(args, host, workspace string) (string, error) } // RunTest runs test on the node. -func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) { +func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) { // Install the cni plugins and add a basic CNI configuration. // TODO(random-liu): Do this in cloud init after we remove containervm test. if err := setupCNI(host, workspace); err != nil { @@ -167,8 +167,8 @@ func (n *NodeE2ERemote) RunTest(host, workspace, results, imageDesc, junitFilePr glog.V(2).Infof("Starting tests on %q", host) cmd := getSSHCommand(" && ", fmt.Sprintf("cd %s", workspace), - fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --system-spec-name=%s --system-spec-file=%s --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s --image-description=\"%s\" %s", - timeout.Seconds(), ginkgoArgs, systemSpecName, systemSpecFile, host, results, junitFilePrefix, imageDesc, testArgs), + fmt.Sprintf("timeout -k 30s %fs ./ginkgo %s ./e2e_node.test -- --system-spec-name=%s --system-spec-file=%s --extra-envs=%s --logtostderr --v 4 --node-name=%s --report-dir=%s --report-prefix=%s --image-description=\"%s\" %s", + timeout.Seconds(), ginkgoArgs, systemSpecName, systemSpecFile, extraEnvs, host, results, junitFilePrefix, imageDesc, testArgs), ) return SSH(host, "sh", "-c", cmd) } diff --git a/test/e2e_node/remote/remote.go b/test/e2e_node/remote/remote.go index 746899f8b57b..47501d297748 100644 --- a/test/e2e_node/remote/remote.go +++ b/test/e2e_node/remote/remote.go @@ -65,7 +65,7 @@ func CreateTestArchive(suite TestSuite, systemSpecName string) (string, error) { // Returns the command output, whether the exit was ok, and any errors // TODO(random-liu): junitFilePrefix is not prefix actually, the file name is junit-junitFilePrefix.xml. Change the variable name. -func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string) (string, bool, error) { +func RunRemote(suite TestSuite, archive string, host string, cleanup bool, imageDesc, junitFilePrefix string, testArgs string, ginkgoArgs string, systemSpecName string, extraEnvs string) (string, bool, error) { // Create the temp staging directory glog.V(2).Infof("Staging test binaries on %q", host) workspace := newWorkspaceDir() @@ -110,7 +110,7 @@ func RunRemote(suite TestSuite, archive string, host string, cleanup bool, image } glog.V(2).Infof("Running test on %q", host) - output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, *testTimeoutSeconds) + output, err := suite.RunTest(host, workspace, resultDir, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs, *testTimeoutSeconds) aggErrs := []error{} // Do not log the output here, let the caller deal with the test output. diff --git a/test/e2e_node/remote/types.go b/test/e2e_node/remote/types.go index f7e360f7440b..33d36fca5e9a 100644 --- a/test/e2e_node/remote/types.go +++ b/test/e2e_node/remote/types.go @@ -46,6 +46,7 @@ type TestSuite interface { // * ginkgoArgs is the arguments passed to ginkgo. // * systemSpecName is the name of the system spec used for validating the // image on which the test runs. + // * extraEnvs is the extra environment variables needed for node e2e tests. // * timeout is the test timeout. - RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName string, timeout time.Duration) (string, error) + RunTest(host, workspace, results, imageDesc, junitFilePrefix, testArgs, ginkgoArgs, systemSpecName, extraEnvs string, timeout time.Duration) (string, error) } diff --git a/test/e2e_node/runner/local/run_local.go b/test/e2e_node/runner/local/run_local.go index c2c169e87bc8..c2b179fde78f 100644 --- a/test/e2e_node/runner/local/run_local.go +++ b/test/e2e_node/runner/local/run_local.go @@ -34,6 +34,7 @@ var buildDependencies = flag.Bool("build-dependencies", true, "If true, build al var ginkgoFlags = flag.String("ginkgo-flags", "", "Space-separated list of arguments to pass to Ginkgo test runner.") var testFlags = flag.String("test-flags", "", "Space-separated list of arguments to pass to node e2e test.") var systemSpecName = flag.String("system-spec-name", "", "The name of the system spec used for validating the image in the node conformance test. The specs are at test/e2e_node/system/specs/. If unspecified, the default built-in spec (system.DefaultSpec) will be used.") +var extraEnvs = flag.String("extra-envs", "", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2") const ( systemSpecPath = "test/e2e_node/system/specs" @@ -65,7 +66,7 @@ func main() { glog.Fatalf("Failed to get k8s root directory: %v", err) } systemSpecFile := filepath.Join(rootDir, systemSpecPath, *systemSpecName+".yaml") - args = append(args, fmt.Sprintf("--system-spec-name=%s --system-spec-file=%s", *systemSpecName, systemSpecFile)) + args = append(args, fmt.Sprintf("--system-spec-name=%s --system-spec-file=%s --extra-envs=%s", *systemSpecName, systemSpecFile, *extraEnvs)) } if err := runCommand(ginkgo, args...); err != nil { glog.Exitf("Test failed: %v", err) diff --git a/test/e2e_node/runner/remote/run_remote.go b/test/e2e_node/runner/remote/run_remote.go index 7c440ad96b65..998a3c468356 100644 --- a/test/e2e_node/runner/remote/run_remote.go +++ b/test/e2e_node/runner/remote/run_remote.go @@ -62,6 +62,7 @@ var instanceMetadata = flag.String("instance-metadata", "", "key/value metadata var gubernator = flag.Bool("gubernator", false, "If true, output Gubernator link to view logs") var ginkgoFlags = flag.String("ginkgo-flags", "", "Passed to ginkgo to specify additional flags such as --skip=.") var systemSpecName = flag.String("system-spec-name", "", "The name of the system spec used for validating the image in the node conformance test. The specs are at test/e2e_node/system/specs/. If unspecified, the default built-in spec (system.DefaultSpec) will be used.") +var extraEnvs = flag.String("extra-envs", "", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2") // envs is the type used to collect all node envs. The key is the env name, // and the value is the env value @@ -440,7 +441,7 @@ func testHost(host string, deleteFiles bool, imageDesc, junitFilePrefix, ginkgoF } } - output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, imageDesc, junitFilePrefix, *testArgs, ginkgoFlagsStr, *systemSpecName) + output, exitOk, err := remote.RunRemote(suite, path, host, deleteFiles, imageDesc, junitFilePrefix, *testArgs, ginkgoFlagsStr, *systemSpecName, *extraEnvs) return &TestResult{ output: output, err: err,