Merge pull request #65018 from shyamjvs/add-debug-logs-to-logexporter

Automatic merge from submit-queue (batch tested with PRs 64974, 65009, 65018). If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>. Increase logexporter timeout and add debug logs Ref - #63030 (comment) So it seems that logexporter isn't running on too many nodes on our 5k node cluster (~40% of nodes). As a result we fallback to ssh-based copying for so many nodes which is slow and hence the job times out. My feeling is it's because of slow scheduling of logexporter pods (and hence quite some nodes didn't even get the chance to run those pods before we delete the daemonset). /cc @wojtek-t @krzyzacy ```release-note NONE ``` /sig scalability /kind bug /priority important-soon /milestone v1.11 /status approved-for-milestone
kubernetes · Jun 12, 2018 · 55c64a5 · 55c64a5
2 parents e7bdebd + 87225c0
commit 55c64a5
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/cluster/log-dump/log-dump.sh b/cluster/log-dump/log-dump.sh
@@ -294,7 +294,7 @@ function dump_nodes_with_logexporter() {
   local -r service_account_credentials="$(cat ${GOOGLE_APPLICATION_CREDENTIALS} | base64 | tr -d '\n')"
   local -r cloud_provider="${KUBERNETES_PROVIDER}"
   local -r enable_hollow_node_logs="${ENABLE_HOLLOW_NODE_LOGS:-false}"
-  local -r logexport_sleep_seconds="$(( 90 + NUM_NODES / 5 ))"
+  local -r logexport_sleep_seconds="$(( 90 + NUM_NODES / 3 ))"
 
   # Fill in the parameters in the logexporter daemonset template.
   sed -i'' -e "s@{{.LogexporterNamespace}}@${logexporter_namespace}@g" "${KUBE_ROOT}/cluster/log-dump/logexporter-daemonset.yaml"
@@ -345,6 +345,7 @@ function dump_nodes_with_logexporter() {
   done
 
   # Delete the logexporter resources and dump logs for the failed nodes (if any) through SSH.
+  "${KUBECTL}" get pods --namespace "${logexporter_namespace}" || true
   "${KUBECTL}" delete namespace "${logexporter_namespace}" || true
   if [[ "${#failed_nodes[@]}" != 0 ]]; then
     echo -e "Dumping logs through SSH for the following nodes:\n${failed_nodes[@]}"