Permalink
Cannot retrieve contributors at this time
#!/bin/bash | |
# find-retran-failures.sh begins on the previous line | |
# | |
# This script uses tshark to look through a packet trace file and find TCP | |
# connections that appear to have failed because of retransmission issues. | |
# Ideally the packet trace file should be collected on the host sending the | |
# packets. If the trace file is collected on the receiver or at some point | |
# between sender and receiver it is possible failures will be missed. | |
# | |
# For each failure identified output will consist of a line containing | |
# 1. the TCP stream number | |
# 2. the number of seconds before the last TCP segment in the file and the | |
# last segment in the stream. A stream that ends with 3 retransmissions | |
# 0.5 seconds before the last TCP segment in the trace is less likely to | |
# have failed then one that ends 200 seconds before the last TCP segment | |
# since there is less time to receive the ACK. Anything less than 0.1 | |
# seconds is ignored | |
# 3. The first IP address and the number of its unACKed bytes | |
# 4. The second IP address and number of its unACKed bytes | |
# This is followed by the last 20 lines of the TCP stream with the columns | |
# 1. TCP stream number | |
# 2. time since start of trace file (relative time) | |
# 3. source IP address | |
# 4. source port | |
# 5. destination IP address | |
# 6. destination port | |
# 7. IP ID (this makes it easy to spot duplicated packets) | |
# 8. IP TTL (makes it easy to see which IP address is local) | |
# 9. TCP sequence number | |
# 10. TCP ACK number | |
# 11. TCP length | |
# 12. FIN flag (1 == FIN) | |
# 13. TCP reset flag (1 == RST) | |
# 14. tshark expert info | |
# And this is followed by a list of TCP streams where the only segments in | |
# the stream where SYN segments. technically these are not TCP connections | |
# that have failed because of retransmissions since the connection was never | |
# completed. | |
# In addition as the script is running it prints out the stream number that | |
# it is currently processing. You can look at the partial results by | |
# monitoring the file /tmp/find-retran-failures-3. | |
# Version 1.0 March 20, 2018 | |
FINDRETRANFAILURESVERSION="1.0_2018-03-20" | |
# from https://github.com/noahdavids/packet-analysis.git | |
# Copyright (C) 2017 Noah Davids | |
# This program is free software: you can redistribute it and/or modify it | |
# under the terms of the GNU General Public License as published by the Free | |
# Software Foundation, version 3, https://www.gnu.org/licenses/gpl-3.0.html | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
if [ $# -ne 1 ] && [ $# -ne 2 ] | |
then echo "Usage:" | |
echo " find-retran-failures.sh FILE [FILTER]" | |
echo " FILE is the name of one file" | |
echo " FILTER is a tshark display filter string" | |
exit | |
fi | |
FILE=$1 | |
if [ $# -eq 2 ] | |
then FILTER=$2 | |
fi | |
# Figure out if we can use "-Y" as the display filter argument or we need | |
# "-R". Basically look at the help output and if we do not find the "-Y" | |
# we use "-R" | |
DASH="-Y" | |
if [ $(tshark -help | egrep "\-Y <display filter>" | wc -l) -eq 0 ] | |
then DASH="-R" | |
fi | |
# make sure that we are starting with a clean slate | |
rm -f /tmp/find-retran-failures-1 | |
rm -f /tmp/find-retran-failures-2 | |
rm -f /tmp/find-retran-failures-3 | |
rm -f /tmp/find-retran-failures-4 | |
rm -f /tmp/find-retran-failures-5 | |
# read all the TCP segments, don't be confused by ICMP | |
if [ $# -eq 1 ] | |
then tshark -r $FILE -Y "tcp && not icmp" -T fields -e tcp.stream \ | |
-e frame.time_relative -e ip.src -e tcp.srcport -e ip.dst \ | |
-e tcp.dstport -e ip.id -e ip.ttl -e tcp.seq -e tcp.ack -e tcp.len \ | |
-e tcp.flags.fin -e tcp.flags.reset -e _ws.expert \ | |
> /tmp/find-retran-failures-1 | |
else tshark -r $FILE -Y "tcp && not icmp && ($FILTER)" -T fields \ | |
-e tcp.stream -e frame.time_relative -e ip.src -e tcp.srcport \ | |
-e ip.dst -e tcp.dstport -e ip.id -e ip.ttl -e tcp.seq -e tcp.ack \ | |
-e tcp.len -e tcp.flags.fin -e tcp.flags.reset -e _ws.expert \ | |
> /tmp/find-retran-failures-1 | |
fi | |
# If no streams where found report that and quit | |
if [ $(cat /tmp/find-retran-failures-1 | wc -l) -eq 0 ] | |
then echo "tshark did not find any TCP streams - exiting" | |
exit | |
fi | |
# record the timestamp of the last TCP segment. not exactly the last frame in | |
# the trace but close enough unless I want to read the trace file twice -- | |
# which I don't want to do. | |
LASTTIME=$(tail -1 /tmp/find-retran-failures-1 | awk '{print $2}') | |
# Also find the last TCP stream number | |
LASTSTREAM=$(sort -nk1 /tmp/find-retran-failures-1 | tail -1 | \ | |
awk '{print $1}') | |
# Loop through the text file -- once for each stream and write out the last | |
# 20 segments of that stream. 20 is arbitrary but I figure it is enough | |
echo -n current stream: | |
for x in $(seq 0 $LASTSTREAM) | |
do echo -n "$x, " | |
grep "^$x\s" /tmp/find-retran-failures-1 | \ | |
while read stream time sip sport dip dport ipid ittl tseq tack \ | |
tlen fin reset expert | |
do | |
echo $stream $time $sip $sport $dip $dport $ipid $ittl $tseq $tack \ | |
$tlen $fin $reset $expert | |
done | tail -20 > /tmp/find-retran-failures-2 | |
# If there are at least 2 retransmissions or duplicate ACKs or out of order | |
# packets (which could be a mis-dentified retransmission) in the last 20 lines | |
# then we need to look more closely. | |
if [ $(grep -m 1 -E "retrans|out|Dup" /tmp/find-retran-failures-2 | wc -l) \ | |
-gt 2 ] | |
# Calculate the time difference between the last segment in the stream and the | |
# last TCP segment in the trace. If the time is < 0.100 seconds there is not | |
# enough time to conclude that no ACK will be received so skip the stream. | |
then DELTAEND=$(awk -v lasttime=$LASTTIME '{print lasttime-$2}' \ | |
/tmp/find-retran-failures-2 | tail -1) | |
if [ $(echo $DELTAEND "<" 0.100 | bc) -eq 1 ] | |
then continue | |
fi | |
# For each IP address, ip1 and ip2, calculate the largest expected ACK by | |
# adding the sequence number to the tcp length plus 1 for a FIN and sorting. | |
# This is the "ns" value. Then find the largest ACK from the other IP address. | |
# If the difference is 0 we know that all data has been ACKed no | |
# retransmission failure. Note that there is 1 failure scenario this will not | |
# identify. If the sender is the remote host and its not getting the local | |
# hosts ACKs we will see all data being ACKed so will not recognize any | |
# problem. | |
cat /tmp/find-retran-failures-2 | awk '{print $3}' | sort -u \ | |
> find-retran-failures-4 | |
ip1=$(head -1 find-retran-failures-4) | |
ip2=$(tail -1 find-retran-failures-4) | |
ip1ns=$(awk -v ip1=$ip1 '($3 == ip1) {print $9 + $11 + $12}' \ | |
/tmp/find-retran-failures-2 | sort -n | tail -1) | |
ip2a=$(awk -v ip2=$ip2 '($3 == ip2) {print $10}' \ | |
/tmp/find-retran-failures-2 | sort -n | tail -1) | |
ip2ns=$(awk -v ip2=$ip2 '($3 == ip2) {print $9 + $11 + $12}' \ | |
/tmp/find-retran-failures-2| sort -n | tail -1) | |
ip1a=$(awk -v ip1=$ip1 '($3 == ip1) {print $10}' \ | |
/tmp/find-retran-failures-2 |sort -n | tail -1) | |
D1=$((ip1ns - ip2a)) | |
D2=$((ip2ns - ip1a)) | |
if [ $D1 -eq 0 ] && [ $D2 -eq 0 ] | |
then continue | |
fi | |
# if one of the ACK numbers is 1 greater than the ns number and the other | |
# number is 0 what we probably have is keep alive sequence so again no | |
# retransmission failure. | |
if [ $D1 -eq 0 ] && [ $D2 -eq -1 ] | |
then continue | |
fi | |
if [ $D1 -eq -1 ] && [ $D2 -eq 0 ] | |
then continue | |
fi | |
# at least 1 IP has sent a FIN and a RESET probably just shutting down the | |
# connection and not waiting for the other IP to send its FIN | |
if [ $(grep "$ip1.*$ip2.*FIN" /tmp/find-retran-failures-2 | \ | |
wc -l) -gt 0 ] && \ | |
[ $(grep "$ip1.*$ip2.*RST" /tmp/find-retran-failures-2 | \ | |
wc -l) -gt 0 ] | |
then continue | |
fi | |
if [ $(grep "$ip2.*$ip1.*FIN" /tmp/find-retran-failures-2 | \ | |
wc -l) -gt 0 ] && \ | |
[ $(grep "$ip2.*$ip1.*RST" /tmp/find-retran-failures-2 | \ | |
wc -l) -gt 0 ] | |
then continue | |
fi | |
# the only segments are SYN segments. We probably have a failure to establish | |
# a connection not the same as a failure in the middle of a connection. Sill | |
# keep a record of the streams and list them at the end | |
if [ $(grep -v SYN /tmp/find-retran-failures-2 | wc -l) -eq 0 ] | |
then echo $x >> /tmp/find-retran-failures-5 | |
continue | |
fi | |
# At this point we have what looks like a retransmission failure | |
(echo ======================================================= | |
echo TCP Stream: $x " " Ends $DELTAEND seconds before trace ends " " \ | |
$ip1 "bytes unACKed: " $D1 " " $ip2 "bytes UnACKed" $D2 | |
echo ======================================================= | |
cat /tmp/find-retran-failures-2 | |
echo =======================================================) \ | |
>> /tmp/find-retran-failures-3 | |
fi | |
done | |
# OK we are done, echo a line to get a new line after the list of streams | |
# processed | |
echo | |
# and dump out the -3 file or indicate that nothing was found | |
if [ -f /tmp/find-retran-failures-3 ] | |
then cat /tmp/find-retran-failures-3 | |
else echo No TCP connections appear to have failed due of retransmissions | |
fi | |
# now list the Streams with only SYN segments | |
if [ -f /tmp/find-retran-failures-5 ] | |
then echo | |
echo | |
echo ======================================================= | |
echo The following streams contain nothing but SYN segments | |
echo ======================================================= | |
cat /tmp/find-retran-failures-5 | tr "\n" ", " | sed "s/,$//g" | |
echo | |
fi | |
# find-retran-failures.sh ends here |