Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #73288 to release-1.12: Decouple node-problem-detector release from kubernetes #75519

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions cluster/gce/config-default.sh
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,8 @@ else
fi
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"

# Optional: Create autoscaler for cluster's nodes.
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
Expand Down
2 changes: 2 additions & 0 deletions cluster/gce/config-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,8 @@ else
fi
NODE_PROBLEM_DETECTOR_VERSION="${NODE_PROBLEM_DETECTOR_VERSION:-}"
NODE_PROBLEM_DETECTOR_TAR_HASH="${NODE_PROBLEM_DETECTOR_TAR_HASH:-}"
NODE_PROBLEM_DETECTOR_RELEASE_PATH="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-}"
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"

# Optional: Create autoscaler for cluster's nodes.
ENABLE_CLUSTER_AUTOSCALER="${KUBE_ENABLE_CLUSTER_AUTOSCALER:-false}"
Expand Down
30 changes: 17 additions & 13 deletions cluster/gce/gci/configure-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1248,21 +1248,25 @@ EOF
function start-node-problem-detector {
echo "Start node problem detector"
local -r npd_bin="${KUBE_HOME}/bin/node-problem-detector"
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
# TODO(random-liu): Handle this for alternative container runtime.
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
echo "Using node problem detector binary at ${npd_bin}"
local flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
flags+=" --logtostderr"
flags+=" --system-log-monitors=${km_config},${dm_config}"
flags+=" --custom-plugin-monitors=${custom_km_config}"
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
flags+=" --port=${npd_port}"
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
flags+=" ${EXTRA_NPD_ARGS}"

local flags="${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-}"
if [[ -z "${flags}" ]]; then
local -r km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor.json"
# TODO(random-liu): Handle this for alternative container runtime.
local -r dm_config="${KUBE_HOME}/node-problem-detector/config/docker-monitor.json"
local -r custom_km_config="${KUBE_HOME}/node-problem-detector/config/kernel-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/systemd-monitor-counter.json,${KUBE_HOME}/node-problem-detector/config/docker-monitor-counter.json"
flags="${NPD_TEST_LOG_LEVEL:-"--v=2"} ${NPD_TEST_ARGS:-}"
flags+=" --logtostderr"
flags+=" --system-log-monitors=${km_config},${dm_config}"
flags+=" --custom-plugin-monitors=${custom_km_config}"
local -r npd_port=${NODE_PROBLEM_DETECTOR_PORT:-20256}
flags+=" --port=${npd_port}"
if [[ -n "${EXTRA_NPD_ARGS:-}" ]]; then
flags+=" ${EXTRA_NPD_ARGS}"
fi
fi
flags+=" --apiserver-override=https://${KUBERNETES_MASTER_NAME}?inClusterConfig=false&auth=/var/lib/node-problem-detector/kubeconfig"

# Write the systemd service file for node problem detector.
cat <<EOF >/etc/systemd/system/node-problem-detector.service
Expand Down
6 changes: 3 additions & 3 deletions cluster/gce/gci/configure.sh
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,12 @@ function install-node-problem-detector {
local -r npd_tar="node-problem-detector-${npd_version}.tar.gz"

if is-preloaded "${npd_tar}" "${npd_sha1}"; then
echo "node-problem-detector is preloaded."
echo "${npd_tar} is preloaded."
return
fi

echo "Downloading node problem detector."
local -r npd_release_path="https://storage.googleapis.com/kubernetes-release"
echo "Downloading ${npd_tar}."
local -r npd_release_path="${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-https://storage.googleapis.com/kubernetes-release}"
download-or-bust "${npd_sha1}" "${npd_release_path}/node-problem-detector/${npd_tar}"
local -r npd_dir="${KUBE_HOME}/node-problem-detector"
mkdir -p "${npd_dir}"
Expand Down
2 changes: 2 additions & 0 deletions cluster/gce/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,8 @@ ENABLE_CLUSTER_UI: $(yaml-quote ${ENABLE_CLUSTER_UI:-false})
ENABLE_NODE_PROBLEM_DETECTOR: $(yaml-quote ${ENABLE_NODE_PROBLEM_DETECTOR:-none})
NODE_PROBLEM_DETECTOR_VERSION: $(yaml-quote ${NODE_PROBLEM_DETECTOR_VERSION:-})
NODE_PROBLEM_DETECTOR_TAR_HASH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_TAR_HASH:-})
NODE_PROBLEM_DETECTOR_RELEASE_PATH: $(yaml-quote ${NODE_PROBLEM_DETECTOR_RELEASE_PATH:-})
NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS: $(yaml-quote ${NODE_PROBLEM_DETECTOR_CUSTOM_FLAGS:-})
ENABLE_NODE_LOGGING: $(yaml-quote ${ENABLE_NODE_LOGGING:-false})
LOGGING_DESTINATION: $(yaml-quote ${LOGGING_DESTINATION:-})
ELASTICSEARCH_LOGGING_REPLICAS: $(yaml-quote ${ELASTICSEARCH_LOGGING_REPLICAS:-})
Expand Down
7 changes: 4 additions & 3 deletions hack/make-rules/test-e2e-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ image_service_endpoint=${IMAGE_SERVICE_ENDPOINT:-""}
run_until_failure=${RUN_UNTIL_FAILURE:-"false"}
test_args=${TEST_ARGS:-""}
system_spec_name=${SYSTEM_SPEC_NAME:-}
extra_envs=${EXTRA_ENVS:-}

# Parse the flags to pass to ginkgo
ginkgoflags=""
Expand Down Expand Up @@ -148,7 +149,7 @@ if [ $remote = true ] ; then
--image-project="$image_project" --instance-name-prefix="$instance_prefix" \
--delete-instances="$delete_instances" --test_args="$test_args" --instance-metadata="$metadata" \
--image-config-file="$image_config_file" --system-spec-name="$system_spec_name" \
--test-suite="$test_suite" \
--extra-envs="$extra_envs" --test-suite="$test_suite" \
2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?

Expand All @@ -169,8 +170,8 @@ else
# Test using the host the script was run on
# Provided for backwards compatibility
go run test/e2e_node/runner/local/run_local.go \
--system-spec-name="$system_spec_name" --ginkgo-flags="$ginkgoflags" \
--test-flags="--container-runtime=${runtime} \
--system-spec-name="$system_spec_name" --extra-envs="$extra_envs" \
--ginkgo-flags="$ginkgoflags" --test-flags="--container-runtime=${runtime} \
--alsologtostderr --v 4 --report-dir=${artifacts} --node-name $(hostname) \
$test_args" --build-dependencies=true 2>&1 | tee -i "${artifacts}/build-log.txt"
exit $?
Expand Down
6 changes: 3 additions & 3 deletions test/e2e/framework/kubelet_stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,8 @@ func HighLatencyKubeletOperations(c clientset.Interface, threshold time.Duration
return badMetrics, nil
}

// getStatsSummary contacts kubelet for the container information.
func getStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) {
// GetStatsSummary contacts kubelet for the container information.
func GetStatsSummary(c clientset.Interface, nodeName string) (*stats.Summary, error) {
ctx, cancel := context.WithTimeout(context.Background(), SingleCallTimeout)
defer cancel()

Expand Down Expand Up @@ -348,7 +348,7 @@ func getOneTimeResourceUsageOnNode(
return nil, fmt.Errorf("numStats needs to be > 1 and < %d", maxNumStatsToRequest)
}
// Get information of all containers on the node.
summary, err := getStatsSummary(c, nodeName)
summary, err := GetStatsSummary(c, nodeName)
if err != nil {
return nil, err
}
Expand Down
3 changes: 3 additions & 0 deletions test/e2e/framework/test_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ type NodeTestContextType struct {
// the node e2e test. If empty, the default one (system.DefaultSpec) is
// used. The system specs are in test/e2e_node/system/specs/.
SystemSpecName string
// ExtraEnvs is a map of environment names to values.
ExtraEnvs map[string]string
}

// StorageConfig contains the shared settings for storage 2e2 tests.
Expand Down Expand Up @@ -304,6 +306,7 @@ func RegisterNodeFlags() {
flag.BoolVar(&TestContext.PrepullImages, "prepull-images", true, "If true, prepull images so image pull failures do not cause test failures.")
flag.StringVar(&TestContext.ImageDescription, "image-description", "", "The description of the image which the test will be running on.")
flag.StringVar(&TestContext.SystemSpecName, "system-spec-name", "", "The name of the system spec (e.g., gke) that's used in the node e2e test. The system specs are in test/e2e_node/system/specs/. This is used by the test framework to determine which tests to run for validating the system requirements.")
flag.Var(utilflag.NewMapStringString(&TestContext.ExtraEnvs), "extra-envs", "The extra environment variables needed for node e2e tests. Format: a list of key=value pairs, e.g., env1=val1,env2=val2")
}

func RegisterStorageFlags() {
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/node/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ go_library(
"kubelet.go",
"kubelet_perf.go",
"mount_propagation.go",
"node_problem_detector.go",
"pod_gc.go",
"pods.go",
"pre_stop.go",
Expand All @@ -19,6 +20,7 @@ go_library(
importpath = "k8s.io/kubernetes/test/e2e/node",
visibility = ["//visibility:public"],
deps = [
"//pkg/api/v1/node:go_default_library",
"//pkg/kubelet/apis/stats/v1alpha1:go_default_library",
"//staging/src/k8s.io/api/core/v1:go_default_library",
"//staging/src/k8s.io/apimachinery/pkg/api/resource:go_default_library",
Expand Down