Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AWS kube-up: collect logs from e2e #23027

Merged
merged 1 commit into from
Mar 17, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
37 changes: 29 additions & 8 deletions cluster/aws/util.sh
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,13 @@ function query-running-minions () {
--query ${query}
}

function find-running-minions () {
function detect-node-names () {
# If this is called directly, VPC_ID might not be set
# (this is case from cluster/log-dump.sh)
if [[ -z "${VPC_ID:-}" ]]; then
VPC_ID=$(get_vpc_id)
fi

NODE_IDS=()
NODE_NAMES=()
for id in $(query-running-minions "Reservations[].Instances[].InstanceId"); do
Expand All @@ -251,8 +257,14 @@ function find-running-minions () {
done
}

# Called to detect the project on GCE
# Not needed on AWS
function detect-project() {
:
}

function detect-nodes () {
find-running-minions
detect-node-names

# This is inefficient, but we want NODE_NAMES / NODE_IDS to be ordered the same as KUBE_NODE_IP_ADDRESSES
KUBE_NODE_IP_ADDRESSES=()
Expand Down Expand Up @@ -1225,7 +1237,7 @@ function wait-minions {
max_attempts=90
fi
while true; do
find-running-minions > $LOG
detect-node-names > $LOG
if [[ ${#NODE_IDS[@]} == ${NUM_NODES} ]]; then
echo -e " ${color_green}${#NODE_IDS[@]} minions started; ready${color_norm}"
break
Expand Down Expand Up @@ -1552,24 +1564,33 @@ function test-teardown {
}


# SSH to a node by name ($1) and run a command ($2).
function ssh-to-node {
# Gets the hostname (or IP) that we should SSH to for the given nodename
# For the master, we use the nodename, for the nodes we use their instanceids
function get_ssh_hostname {
local node="$1"
local cmd="$2"

if [[ "${node}" == "${MASTER_NAME}" ]]; then
node=$(get_instanceid_from_name ${MASTER_NAME})
if [[ -z "${node-}" ]]; then
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'"
echo "Could not detect Kubernetes master node. Make sure you've launched a cluster with 'kube-up.sh'" 1>&2
exit 1
fi
fi

local ip=$(get_instance_public_ip ${node})
if [[ -z "$ip" ]]; then
echo "Could not detect IP for ${node}."
echo "Could not detect IP for ${node}." 1>&2
exit 1
fi
echo ${ip}
}

# SSH to a node by name ($1) and run a command ($2).
function ssh-to-node {
local node="$1"
local cmd="$2"

local ip=$(get_ssh_hostname ${node})

for try in $(seq 1 5); do
if ssh -oLogLevel=quiet -oConnectTimeout=30 -oStrictHostKeyChecking=no -i "${AWS_SSH_KEY}" ${SSH_USER}@${ip} "echo test > /dev/null"; then
Expand Down
14 changes: 10 additions & 4 deletions cluster/log-dump.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ function copy-logs-from-node() {
local -r scp_files="{$(echo ${files[*]} | tr ' ' ',')}"

if [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
local ip=$(get_instance_public_ip "${node}")
local ip=$(get_ssh_hostname "${node}")
scp -i "${AWS_SSH_KEY}" "${SSH_USER}@${ip}:${scp_files}" "${dir}" > /dev/null || true
else
gcloud compute copy-files --project "${PROJECT}" --zone "${ZONE}" "${node}:${scp_files}" "${dir}" > /dev/null || true
Expand All @@ -62,10 +62,14 @@ function save-logs() {
if [[ "${KUBERNETES_PROVIDER}" == "gce" ]]; then
files="${files} ${gce_logfiles}"
fi
if [[ "${KUBERNETES_PROVIDER}" == "aws" ]]; then
files="${files} ${aws_logfiles}"
fi
if ssh-to-node "${node_name}" "sudo systemctl status kubelet.service" &> /dev/null; then
ssh-to-node "${node_name}" "sudo journalctl --output=cat -u kubelet.service" > "${dir}/kubelet.log" || true
ssh-to-node "${node_name}" "sudo journalctl --output=cat -u docker.service" > "${dir}/docker.log" || true
else
files="${files} ${supervisord_logfiles}"
files="${files} ${initd_logfiles} ${supervisord_logfiles}"
fi
copy-logs-from-node "${node_name}" "${dir}" "${files}"
}
Expand All @@ -75,8 +79,10 @@ readonly node_ssh_supported_providers="gce gke aws"

readonly master_logfiles="kube-apiserver kube-scheduler kube-controller-manager etcd"
readonly node_logfiles="kube-proxy"
readonly aws_logfiles="cloud-init-output"
readonly gce_logfiles="startupscript"
readonly common_logfiles="kern docker"
readonly common_logfiles="kern"
readonly initd_logfiles="docker"
readonly supervisord_logfiles="kubelet supervisor/supervisord supervisor/kubelet-stdout supervisor/kubelet-stderr"

# Limit the number of concurrent node connections so that we don't run out of
Expand All @@ -85,7 +91,7 @@ readonly max_scp_processes=25

if [[ ! "${master_ssh_supported_providers}" =~ "${KUBERNETES_PROVIDER}" ]]; then
echo "Master SSH not supported for ${KUBERNETES_PROVIDER}"
elif ! $(detect-master &> /dev/null); then
elif ! (detect-master &> /dev/null); then
echo "Master not detected. Is the cluster up?"
else
readonly master_dir="${report_dir}/${MASTER_NAME}"
Expand Down