Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Automated cherry pick of #51834 #52361

Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
49 changes: 24 additions & 25 deletions cluster/log-dump/log-dump.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,14 @@ else
readonly use_custom_instance_list=
fi

readonly master_ssh_supported_providers="gce aws kubemark"
readonly node_ssh_supported_providers="gce gke aws kubemark"
readonly master_ssh_supported_providers="gce aws"
readonly node_ssh_supported_providers="gce gke aws"
readonly gcloud_supported_providers="gce gke"

readonly master_logfiles="kube-apiserver kube-scheduler rescheduler kube-controller-manager etcd etcd-events glbc cluster-autoscaler kube-addon-manager fluentd"
readonly node_logfiles="kube-proxy fluentd node-problem-detector"
readonly node_systemd_services="node-problem-detector"
readonly hollow_node_logfiles="kubelet-hollow-node-* kubeproxy-hollow-node-* npd-*"
readonly hollow_node_logfiles="kubelet-hollow-node-* kubeproxy-hollow-node-* npd-hollow-node-*"
readonly aws_logfiles="cloud-init-output"
readonly gce_logfiles="startupscript"
readonly kern_logfile="kern"
Expand All @@ -51,16 +52,15 @@ readonly systemd_services="kubelet docker"
# file descriptors for large clusters.
readonly max_scp_processes=25

# This template spits out the external IPs and images for each node in the cluster in a format like so:
# 52.32.7.85 gcr.io/google_containers/kube-apiserver:1355c18c32d7bef16125120bce194fad gcr.io/google_containers/kube-controller-manager:46365cdd8d28b8207950c3c21d1f3900 [...]
readonly ips_and_images='{range .items[*]}{@.status.addresses[?(@.type == "ExternalIP")].address} {@.status.images[*].names[*]}{"\n"}{end}'

function setup() {
if [[ -z "${use_custom_instance_list}" ]]; then
KUBE_ROOT=$(dirname "${BASH_SOURCE}")/../..
: ${KUBE_CONFIG_FILE:="config-test.sh"}
source "${KUBE_ROOT}/cluster/kube-util.sh"
detect-project &> /dev/null
echo "Detecting project"
detect-project 2>&1
elif [[ "${KUBERNETES_PROVIDER}" == "gke" ]]; then
echo "Using 'use_custom_instance_list' with gke, skipping check for LOG_DUMP_SSH_KEY and LOG_DUMP_SSH_USER"
elif [[ -z "${LOG_DUMP_SSH_KEY:-}" ]]; then
echo "LOG_DUMP_SSH_KEY not set, but required when using log_dump_custom_get_instances"
exit 1
Expand Down Expand Up @@ -98,20 +98,17 @@ function copy-logs-from-node() {
# Comma delimit (even the singleton, or scp does the wrong thing), surround by braces.
local -r scp_files="{$(printf "%s," "${files[@]}")}"

if [[ -n "${use_custom_instance_list}" ]]; then
if [[ "${gcloud_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then
# get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information
gcloud compute instances get-serial-port-output --project "${PROJECT}" --zone "${ZONE}" --port 1 "${node}" > "${dir}/serial-1.log" || true
gcloud compute scp --recurse --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true
elif [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
local ip=$(get_ssh_hostname "${node}")
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true
elif [[ -n "${use_custom_instance_list}" ]]; then
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${LOG_DUMP_SSH_KEY}" "${LOG_DUMP_SSH_USER}@${node}:${scp_files}" "${dir}" > /dev/null || true
else
case "${KUBERNETES_PROVIDER}" in
gce|gke|kubemark)
# get-serial-port-output lets you ask for ports 1-4, but currently (11/21/2016) only port 1 contains useful information
gcloud compute instances get-serial-port-output --project "${PROJECT}" --zone "${ZONE}" --port 1 "${node}" > "${dir}/serial-1.log" || true
gcloud compute scp --recurse --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true
;;
aws)
local ip=$(get_ssh_hostname "${node}")
scp -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true
;;
esac
echo "Unknown cloud-provider '${KUBERNETES_PROVIDER}' and use_custom_instance_list is unset too - skipping logdump for '${node}'"
fi
}

Expand All @@ -130,11 +127,8 @@ function save-logs() {
fi
else
case "${KUBERNETES_PROVIDER}" in
gce|gke|kubemark)
gce|gke)
files="${files} ${gce_logfiles}"
if [[ "${KUBERNETES_PROVIDER}" == "kubemark" && "${ENABLE_HOLLOW_NODE_LOGS:-}" == "true" ]]; then
files="${files} ${hollow_node_logfiles}"
fi
;;
aws)
files="${files} ${aws_logfiles}"
Expand Down Expand Up @@ -220,13 +214,18 @@ function dump_nodes() {
return
fi

node_logfiles_all="${node_logfiles}"
if [[ "${ENABLE_HOLLOW_NODE_LOGS:-}" == "true" ]]; then
node_logfiles_all="${node_logfiles_all} ${hollow_node_logfiles}"
fi

proc=${max_scp_processes}
for node_name in "${node_names[@]}"; do
node_dir="${report_dir}/${node_name}"
mkdir -p "${node_dir}"
# Save logs in the background. This speeds up things when there are
# many nodes.
save-logs "${node_name}" "${node_dir}" "${node_logfiles}" "${node_systemd_services}" &
save-logs "${node_name}" "${node_dir}" "${node_logfiles_all}" "${node_systemd_services}" &

# We don't want to run more than ${max_scp_processes} at a time, so
# wait once we hit that many nodes. This isn't ideal, since one might
Expand Down