Skip to content

Commit

Permalink
Merge pull request #183 from astoycos/release-4.5
Browse files Browse the repository at this point in the history
Bug 1887446: [release-4.5]Backport `gather_core_dumps`
  • Loading branch information
openshift-merge-robot committed Oct 22, 2020
2 parents 4e97dec + 4fa7ff9 commit f6a03f4
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 3 deletions.
51 changes: 51 additions & 0 deletions collection-scripts/gather_core_dumps
@@ -0,0 +1,51 @@
#!/bin/bash
BASE_COLLECTION_PATH="must-gather"
CORE_DUMP_PATH=${OUT:-"${BASE_COLLECTION_PATH}/node_core_dumps"}

mkdir -p "${CORE_DUMP_PATH}"/

function get_dump_off_node {
local debugPod=""

#Get debug pod's name
debugPod=$(oc debug --to-namespace="default" node/"$1" -o jsonpath='{.metadata.name}')

#Start Debug pod force it to stay up until removed in "default" namespace
oc debug --to-namespace="default" node/"$1" -- /bin/bash -c 'sleep 300' > /dev/null 2>&1 &

#Mimic a normal oc call, i.e pause between two successive calls to allow pod to register
sleep 2
oc wait -n "default" --for=condition=Ready pod/"$debugPod" --timeout=30s

if [ -z "$debugPod" ]
then
echo "Debug pod for node ""$1"" never activated"
else
#Copy Core Dumps out of Nodes suppress Stdout
echo "Copying core dumps on node ""$1"""
oc cp --loglevel 1 -n "default" "$debugPod":/host/var/lib/systemd/coredump "${CORE_DUMP_PATH}"/"$1"_core_dump > /dev/null 2>&1 && PIDS+=($!)

#clean up debug pod after we are done using them
oc delete pod "$debugPod" -n "default"
fi
}

function gather_core_dump_data {
#Run coredump pull function on all nodes in parallel
for NODE in ${NODES}; do
get_dump_off_node ${NODE} &
done
}

if [ $# -eq 0 ]; then
echo "WARNING: Collecting core dumps on ALL linux nodes in your cluster. This could take a long time."
fi

PIDS=()
NODES="${@:-$(oc get nodes -o jsonpath='{range .items[*]}{@.metadata.name} {.status.nodeInfo.operatingSystem==linux}')}"

gather_core_dump_data

echo "INFO: Waiting for node core dump collection to complete ..."
wait ${PIDS[@]}
echo "INFO: Node core dump collection to complete."
15 changes: 12 additions & 3 deletions collection-scripts/gather_network_logs
Expand Up @@ -55,7 +55,14 @@ function gather_ovn_kubernetes_nodes_data {
PIDS+=($!)
oc -n openshift-ovn-kubernetes exec ${OVS_NODE_POD} -- bash -c \
"ovs-ofctl dump-flows br-local" > ${NETWORK_LOG_PATH}/${NODE}_${OVS_NODE_POD}_ovs_ofctl_dump_flows_br_local &
PIDS+=($!)
oc -n openshift-ovn-kubernetes exec ${OVS_NODE_POD} -- bash -c \
"ovs-ofctl dump-ports-desc br-ex" > ${NETWORK_LOG_PATH}/${NODE}_${OVS_NODE_POD}_ovs_ofctl_dump_ports_br_ex &
PIDS+=($!)
oc -n openshift-ovn-kubernetes exec ${OVS_NODE_POD} -- bash -c \
"ovs-ofctl dump-flows br-ex" > ${NETWORK_LOG_PATH}/${NODE}_${OVS_NODE_POD}_ovs_ofctl_dump_flows_br_ex &
PIDS+=($!)

oc -n openshift-ovn-kubernetes exec ${OVS_NODE_POD} -- bash -c \
"ovs-vsctl show" > ${NETWORK_LOG_PATH}/${NODE}_${OVS_NODE_POD}_ovs_dump &
PIDS+=($!)
Expand All @@ -65,11 +72,13 @@ function gather_ovn_kubernetes_nodes_data {

if [[ ${OVNKUBE_MASTER} != "" ]] ; then
oc cp openshift-ovn-kubernetes/${OVNKUBE_MASTER}:/etc/openvswitch/ovnsb_db.db ${NETWORK_LOG_PATH}/${NODE}_ovnsb_db.db > ${NETWORK_LOG_PATH}/${NODE}_sbdb &
gzip ${NETWORK_LOG_PATH}/${NODE}_sbdb &
PIDS+=($!)

oc cp openshift-ovn-kubernetes/${OVNKUBE_MASTER}:/etc/openvswitch/ovnnb_db.db ${NETWORK_LOG_PATH}/${NODE}_ovnnb_db.db > ${NETWORK_LOG_PATH}/${NODE}_nbdb &
gzip ${NETWORK_LOG_PATH}/${NODE}_nbdb &
PIDS+=($!)
fi

done
}

Expand Down Expand Up @@ -123,11 +132,11 @@ function gather_kuryr_data {


if [ $# -eq 0 ]; then
echo "WARNING: Collecting network logs on ALL nodes in your cluster. This could take a long time." >&2
echo "WARNING: Collecting network logs on ALL linux nodes in your cluster. This could take a long time." >&2
fi

PIDS=()
NODES="${@:-$(oc get nodes --no-headers -o custom-columns=':metadata.name')}"
NODES="${@:-$(oc get nodes -o jsonpath='{range .items[*]}{@.metadata.name} {.status.nodeInfo.operatingSystem==linux}')}"
NETWORK_TYPE=$(oc get network.config.openshift.io -o=jsonpath='{.items[0].spec.networkType}' | tr '[:upper:]' '[:lower:]')
if [[ "${NETWORK_TYPE}" == "openshiftsdn" ]]; then
gather_openshiftsdn_nodes_data
Expand Down

0 comments on commit f6a03f4

Please sign in to comment.