Skip to content

Commit

Permalink
Merge pull request #75518 from wangzhen127/automated-cherry-pick-of-#…
Browse files Browse the repository at this point in the history
…73288-upstream-release-1.11

Automated cherry pick of #73288 to release-1.11: Decouple node-problem-detector release from kubernetes
  • Loading branch information
k8s-ci-robot committed Apr 1, 2019
2 parents f099a9d + ffa6f47 commit 9575832
Show file tree
Hide file tree
Showing 23 changed files with 416 additions and 57 deletions.
2 changes: 2 additions & 0 deletions cluster/gce/config-default.sh
Expand Up @@ -284,6 +284,8 @@ else
fi
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"

# Optional: Create autoscaler for cluster's nodes.
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
Expand Down
2 changes: 2 additions & 0 deletions cluster/gce/config-test.sh
Expand Up @@ -291,6 +291,8 @@ else
fi
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"

# Optional: Create autoscaler for cluster's nodes.
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
Expand Down
30 changes: 17 additions & 13 deletions cluster/gce/gci/configure-helper.sh
Expand Up @@ -1197,21 +1197,25 @@ EOF
function start-node-problem-detector {
echo "Start node problem detector"
local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector"
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
# TODO(random-liu): Handle this for alternative container runtime.
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
echo "Using node problem detector binary at ${npd_bin}"
local flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
flags+=" --logtostderr"
flags+=" --system-log-monitors=${km_config},${dm_config}"
flags+=" --custom-plugin-monitors=${custom_km_config}"
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
flags+=" --port=${npd_port}"
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
flags+=" ${EXTRA_NPD_ARGS}"

local flags="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
if [[ -z "${flags}" ]]; then
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
# TODO(random-liu): Handle this for alternative container runtime.
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
flags+=" --logtostderr"
flags+=" --system-log-monitors=${km_config},${dm_config}"
flags+=" --custom-plugin-monitors=${custom_km_config}"
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
flags+=" --port=${npd_port}"
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
flags+=" ${EXTRA_NPD_ARGS}"
fi
fi
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"

# Write the systemd service file for node problem detector.
cat <<EOF >/etc/systemd/system/node-problem-detector.service
Expand Down
6 changes: 3 additions & 3 deletions cluster/gce/gci/configure.sh
Expand Up @@ -202,12 +202,12 @@ function install-node-problem-detector {
local -r npd_tar="node-problem-detector-${npd_version}.tar.gz"

if is-preloaded "${npd_tar}" "${npd_sha1}"; then
echo "node-problem-detector is preloaded."
echo "${npd_tar} is preloaded."
return
fi

echo "Downloading node problem detector."
local -r npd_release_path="https://storage.googleapis.com/kubernetes-release"
echo "Downloading ${npd_tar}."
local -r npd_release_path="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-https://storage.googleapis.com/kubernetes-release}"
download-or-bust "${npd_sha1}" "${npd_release_path}/node-problem-detector/${npd_tar}"
local -r npd_dir="${KUBE_HOME}/node-problem-detector"
mkdir -p "${npd_dir}"
Expand Down
2 changes: 2 additions & 0 deletions cluster/gce/util.sh
Expand Up @@ -829,6 +829,8 @@ ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-none})
NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote ${NODE_PROBLEM_DETECTOR_VERSION:-})
NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_TAR_HASH:-})
NODE_PROBLEM_DETECTOR_RELEASE_PATH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-})
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS: $(yaml-quote ${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-})
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
ENABLE_RESCHEDULER: $(yaml-quote ${ENABLE_RESCHEDULER:-false})
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
Expand Down
7 changes: 4 additions & 3 deletions hack/make-rules/test-e2e-node.sh
Expand Up @@ -34,6 +34,7 @@ image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""}
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
test_args=${TEST_ARGS:-""}
system_spec_name=${SYSTEM_SPEC_NAME:-}
extra_envs=${EXTRA_ENVS:-}

# Parse the flags to pass to ginkgo
ginkgoflags=""
Expand Down Expand Up @@ -148,7 +149,7 @@ if [ $remote = true ] ; then
--image-project="$image_project" --instance-name-prefix="$instance_prefix" \
--delete-instances="$delete_instances" --test_args="$test_args" --instance-metadata="$metadata" \
--image-config-file="$image_config_file" --system-spec-name="$system_spec_name" \
--test-suite="$test_suite" \
--extra-envs="$extra_envs" --test-suite="$test_suite" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?

Expand All @@ -169,8 +170,8 @@ else
# Test using the host the script was run on
# Provided for backwards compatibility
go run test/e2e_node/runner/local/run_local.go \
--system-spec-name="$system_spec_name" --ginkgo-flags="$ginkgoflags" \
--test-flags="--container-runtime=${runtime} \
--system-spec-name="$system_spec_name" --extra-envs="$extra_envs" \
--ginkgo-flags="$ginkgoflags" --test-flags="--container-runtime=${runtime} \
--alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \
$test_args" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/framework/kubelet_stats.go
Expand Up @@ -281,8 +281,8 @@ func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration
return badMetrics, nil
}

// getStatsSummary contacts kubelet for the container information.
func getStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) {
// GetStatsSummary contacts kubelet for the container information.
func GetStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) {
ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout)
defer cancel()

Expand Down Expand Up @@ -348,7 +348,7 @@ func getOneTimeResourceUsageOnNode(
return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest)
}
// Get information of all containers on the node.
summary, err := getStatsSummary(c, nodeName)
summary, err := GetStatsSummary(c, nodeName)
if err != nil {
return nil, err
}
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/framework/test_context.go
Expand Up @@ -160,6 +160,8 @@ type NodeTestContextType struct {
// the node e2e test. If empty, the default one (system.DefaultSpec) is
// used. The system specs are in test/e2e_node/system/specs/.
SystemSpecName string
// ExtraEnvs is a map of environment names to values.
ExtraEnvs map[string]string
}

// StorageConfig contains the shared settings for storage 2e2 tests.
Expand Down Expand Up @@ -301,6 +303,7 @@ func RegisterNodeFlags() {
flag.BoolVar(&TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
flag.StringVar(&TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
flag.StringVar(&TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
flag.Var(utilflag.NewMapStringString(&TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
}

func RegisterStorageFlags() {
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/node/BUILD
Expand Up @@ -9,6 +9,7 @@ go_library(
"kubelet.go",
"kubelet_perf.go",
"mount_propagation.go",
"node_problem_detector.go",
"pod_gc.go",
"pods.go",
"pre_stop.go",
Expand All @@ -18,6 +19,7 @@ go_library(
importpath = "k8s.io/kubernetes/test/e2e/node",
visibility = ["//visibility:public"],
deps = [
"//pkg/api/v1/node:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//test/e2e/common:go_default_library",
"//test/e2e/framework:go_default_library",
Expand Down

0 comments on commit 9575832

Please sign in to comment.