From 83bce93b6d56069226a1a61b0805ba704cd2784a Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Wed, 28 Aug 2013 00:20:51 -0400 Subject: [PATCH 1/3] speed ups for hops.py, add parmeters to hops.py, generate diagrams of major AS-hops --- diagram.sh | 31 ++++++++++++ hops.py | 132 ++++++++++++++++++++++++++++++++++--------------- pretty_hops.sh | 21 +++----- stages.sh | 22 ++++++--- 4 files changed, 146 insertions(+), 60 deletions(-) create mode 100755 diagram.sh diff --git a/diagram.sh b/diagram.sh new file mode 100755 index 0000000..e1aa6b1 --- /dev/null +++ b/diagram.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +export LC_ALL=C +cat <$as2 [ label=\"%0.2f\"];\n" "$rate" + done + +cat < output.png + diff --git a/hops.py b/hops.py index 5f658a0..802f7c0 100755 --- a/hops.py +++ b/hops.py @@ -1,6 +1,7 @@ #!/usr/bin/python import socket import struct +import sys # # from the web! @@ -8,20 +9,33 @@ def iptoint(ip): return int(socket.inet_aton(ip).encode('hex'),16) -def as_array(filename,skip=1): +def inttoip(ip): + return socket.inet_ntoa(hex(ip)[2:].zfill(8).decode('hex')) + +AS2NAME={} +def get_asno(as_raw): + as_long = as_raw.replace('"','').replace('&','').replace('-', ' ') + as_split = as_long.split(' ') + as_no = as_split[0].strip() + AS2NAME[as_no] = as_no.strip() + if len(as_split) > 1: + AS2NAME[as_no] = as_split[1].strip() + return as_no + +def as_array(filename,skip_header=True): ases = {} f = open(filename, 'r') - skipped = 0 + header_skipped = False counter = 0 for line in f: - if (skipped < skip): - skipped += 1 + if not header_skipped and skip_header: + header_skipped=True continue s = line.split(',') - as_no = (s[2].split(' '))[0].lstrip("\"") - low = s[0] - high = s[1] - ases[counter] = as_no + "," + low + "," + high + as_no = get_asno(s[2]) + low = int(s[0]) + high = int(s[1]) + ases[counter] = (as_no,low,high) counter += 1 f.close() return ases @@ -30,85 +44,123 @@ def lookup_as(ip, ases, cache): if (cache and ip in cache): return cache[ip] for as_and_range in ases: - s = ases[as_and_range].split(',') - low = int(s[1]) - high = int(s[2]) + (as_no,low,high) = ases[as_and_range] if (low <= ip and ip <= high): if cache: - cache[ip] = s[0] - return s[0] - return "NO AS" + cache[ip] = as_no + return as_no + # note: first-char as '[a-z]' make graphviz easier. + return "x"+inttoip(ip).replace(".", "") -def rate_array(filename, skip=1): +def rate_array(filename, skip_header=True): rate = {} f = open(filename, 'r') - skipped = 0 + header_skipped = False + total_rates = 0 for line in f: - if (skipped < skip): - skipped += 1 + if not header_skipped and skip_header: + header_skipped=True continue s = line.split(',') site = (s[1].rpartition('.'))[0] client = s[2] index = site + "," + client - bw = s[3] - rate[index] = bw + bw = float(s[3]) + if index not in rate: + rate[index] = [] + rate[index].append(bw) + total_rates += 1 + print "Found %s raw, client rates" % total_rates f.close() return rate -def hop_array(filename, rates, skip=1): +def hop_array(filename, rates, skip_header=True): hops = {} f = open(filename, 'r') - skipped = 0 + header_skipped = False + hop_rates = 0 + hop_count = 0 + hop_rates_saved = {} for line in f: - if (skipped < skip): - skipped += 1 + if not header_skipped and skip_header: + header_skipped=True continue s = line.split(',') site = (s[1].rpartition('.'))[0] client = s[2] - print "Another data point ..." - hop_a = s[3] - hop_b = s[4] + hop_a = iptoint(s[3]) + hop_b = iptoint(s[4]) + #print "Another data point %s -> %s" % (s[3], s[4]) rates_index = site + "," + client if (hop_a not in hops): hops[hop_a] = {} if (hop_b not in hops[hop_a]): - hops[hop_a][hop_b] = "" - hops[hop_a][hop_b] += rates[rates_index] + "," + hops[hop_a][hop_b] = [] + if (rates_index,hop_a,hop_b) not in hop_rates_saved: + # Save rates between all distinct pairs of rates_index,hop_a,hop_b + hop_rates_saved[(rates_index,hop_a,hop_b)] = True + hops[hop_a][hop_b] += rates[rates_index] + hop_rates += len(rates[rates_index]) + hop_count += 1 + print "Assigned %s rates to %s distinct hops" % (hop_rates, hop_count) f.close() return hops def asify_hop_array(hops, ases): as_hops = {} as_cache = {} + len_hop_a = len(hops) + i_progress = 0.0 + i_rates = 0 + hop_saved = {} + hop_count = 0 for hop_a in hops: + msg = "Finding primary, AS-Hop pairs ... %0.2f%%" % (100*i_progress/len_hop_a) + sys.stdout.write("\b"*len(msg)) + sys.stdout.write(msg) + sys.stdout.flush() for hop_b in hops[hop_a]: - print "Hop pair ..." - as_hop_a = lookup_as(iptoint(hop_a), ases, as_cache) - as_hop_b = lookup_as(iptoint(hop_b), ases, as_cache) + as_hop_a = lookup_as(hop_a, ases, as_cache) + as_hop_b = lookup_as(hop_b, ases, as_cache) if as_hop_a not in as_hops: as_hops[as_hop_a] = {} if as_hop_b not in as_hops[as_hop_a]: - as_hops[as_hop_a][as_hop_b] = "" - as_hops[as_hop_a][as_hop_b] += hops[hop_a][hop_b] + as_hops[as_hop_a][as_hop_b] = [] + if (hop_a,hop_b) not in hop_saved: + hop_saved[(hop_a,hop_b)] = True + as_hops[as_hop_a][as_hop_b] += hops[hop_a][hop_b] + i_rates += len(hops[hop_a][hop_b]) + hop_count += 1 + i_progress+=1.0 + print "\nFound %s rates in %s distinct AS hops" % (i_rates, hop_count) return as_hops def write_hop_array(filename, hops): f = open(filename, 'w') - for hop_a in hops.keys(): - for hop_b in hops[hop_a].keys(): + f.write("as1,as2,count,rate\n") + for ashop_a in hops.keys(): + for ashop_b in hops[ashop_a].keys(): # # The number of results reported is a # little off because of fence-post # issue with trailing , in the list # - f.write(hop_a + "," + hop_b + "," + str(len(hops[hop_a][hop_b].split(','))) + ":" + hops[hop_a][hop_b] + "\n") + cnt_test = len(hops[ashop_a][ashop_b]) + avg_test = 0 + if cnt_test > 0: + avg_test = sum(hops[ashop_a][ashop_b])/cnt_test + if ashop_a not in AS2NAME: AS2NAME[ashop_a] = ashop_a + if ashop_b not in AS2NAME: AS2NAME[ashop_b] = ashop_b + output = [ashop_a, AS2NAME[ashop_a], ashop_b, AS2NAME[ashop_b], str(cnt_test), str(avg_test) ] + f.write(",".join(output) + "\n") f.close() -rate = rate_array("cache/stage1.comcast.lga01.sql.csv") +isp = sys.argv[1] +site= sys.argv[2] + +rate = rate_array("cache/stage1.%s.%s.sql.csv" % (isp,site)) ases = as_array("GeoIPASNum2.csv", 0) -hops = hop_array("cache/stage3.comcast.lga01.sql.csv", rate) +hops = hop_array("cache/stage3.%s.%s.sql.csv" % (isp,site), rate) as_hops = asify_hop_array(hops, ases) -write_hop_array("cache/hops.csv", as_hops) +write_hop_array("cache/hops.%s.%s.csv" %(isp,site), as_hops) #print lookup_as(iptoint("8.8.8.8"), ases, None) diff --git a/pretty_hops.sh b/pretty_hops.sh index 16ebd8e..8879689 100755 --- a/pretty_hops.sh +++ b/pretty_hops.sh @@ -1,18 +1,13 @@ #!/bin/bash -awk --field-separator=, ' - function to_as_string(as_no) - { - if (as_no == "NO AS") - return as_no - "grep " as_no " GeoIPASNum2.csv | head -n1 | sed \"s/^.*\\(AS.*$\\)/\\1/\"" | getline output - return output; - } - { - if ($1 != $2) { - print to_as_string($1) "(" $1 ")" "->" to_as_string($2) "(" $2 ")"; - } -}' cache/hops.csv +export LC_ALL=C +cat cache/hops.$1.$2.csv | grep -v as1 | awk -F, '{print $1,$2,$3,$4,$5}' | \ + while read as1 AS1 as2 AS2 count ; do + if test "$as1" = "$as2" ; then continue ; fi + #AS1=`LC_ALL=C grep "$as1 " GeoIPASNum2.csv | head -n1 | awk -F, '{print $3}' | tr '"-' ' ' | awk '{print $2}' ` + #AS2=`LC_ALL=C grep "$as2 " GeoIPASNum2.csv | head -n1 | awk -F, '{print $3}' | tr '"-' ' ' | awk '{print $2}' ` + printf "%-10s -> %-10s %-4s %-15s -> %-15s\n" "$as1" "$as2" $count "$AS1" "$AS2" + done #"s/^.*\(AS[0-9]\+\).*$/\1/" diff --git a/stages.sh b/stages.sh index 06d1b33..89d4752 100755 --- a/stages.sh +++ b/stages.sh @@ -11,6 +11,7 @@ function find_command () { find_command wget find_command m4 +find_command dot IP2ASNFILE=GeoIPASNum2 @@ -157,19 +158,26 @@ function handle_stage3_query () { } +ISP=comcast # NDT server ip addrs -handle_stage1_query comcast stage1 lga01 "'74.63.50.19','74.63.50.32','74.63.50.47'" -#handle_stage1_query comcast stage1 lga02 "'38.106.70.147','38.106.70.160','38.106.70.173'" -#handle_stage1_query cablevision stage1 lga01 "'74.63.50.19','74.63.50.32','74.63.50.47'" -#handle_stage1_query cablevision stage1 lga02 "'38.106.70.147','38.106.70.160','38.106.70.173'" +handle_stage1_query $ISP stage1 lga01 "'74.63.50.19','74.63.50.32','74.63.50.47'" +handle_stage1_query $ISP stage1 lga02 "'38.106.70.147','38.106.70.160','38.106.70.173'" # NPAD (*not* NDT) server ip addrs -handle_stage2_query comcast stage2 lga01 "'74.63.50.10','74.63.50.23','74.63.50.43'" -#handle_stage2_query comcast stage2 lga02 "'38.106.70.146','38.106.70.151','38.106.70.172'" +handle_stage2_query $ISP stage2 lga01 "'74.63.50.10','74.63.50.23','74.63.50.43'" +handle_stage2_query $ISP stage2 lga02 "'38.106.70.146','38.106.70.151','38.106.70.172'" # NPAD (*not* NDT) server ip addrs -handle_stage3_query comcast stage3 lga01 "'74.63.50.10','74.63.50.23','74.63.50.43'" +handle_stage3_query $ISP stage3 lga01 "'74.63.50.10','74.63.50.23','74.63.50.43'" +handle_stage3_query $ISP stage3 lga02 "'38.106.70.146','38.106.70.151','38.106.70.172'" +./hops.py $ISP lga01 +./hops.py $ISP lga02 + +./diagram.sh $ISP lga01 > input/$ISP.lga01.gv +dot -Tpng input/$ISP.lga01.gv > graphs/$ISP.lga01.png +./diagram.sh $ISP lga02 > input/$ISP.lga02.gv +dot -Tpng input/$ISP.lga02.gv > graphs/$ISP.lga02.png #generate_ispquery warner #generate_ispquery rcn From 86fdca61d8a53270d4aa28c3105f7948b04cb6f3 Mon Sep 17 00:00:00 2001 From: Stephen Soltesz Date: Wed, 28 Aug 2013 08:10:31 -0400 Subject: [PATCH 2/3] use first-3 octets to identify unknowns --- hops.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hops.py b/hops.py index 802f7c0..3e3f2aa 100755 --- a/hops.py +++ b/hops.py @@ -50,7 +50,8 @@ def lookup_as(ip, ases, cache): cache[ip] = as_no return as_no # note: first-char as '[a-z]' make graphviz easier. - return "x"+inttoip(ip).replace(".", "") + # note: also only return first three octets to reduce number of 'unknowns' + return "x"+(inttoip(ip).rpartition('.'))[0].replace(".","") def rate_array(filename, skip_header=True): rate = {} From f2d424cc63e9f37bc9512d873f4f04e071e18818 Mon Sep 17 00:00:00 2001 From: "soltesz@opentechinstitute.org" Date: Wed, 28 Aug 2013 10:38:38 -0400 Subject: [PATCH 3/3] add manual IP ranges for some ASes --- diagram.sh | 4 ++-- hops.py | 14 ++++++++++++++ stages.sh | 3 +++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/diagram.sh b/diagram.sh index e1aa6b1..9888da4 100755 --- a/diagram.sh +++ b/diagram.sh @@ -17,12 +17,12 @@ cat $HOPSFILE | grep -v as1 | awk -F, '{print $1,$2}' | \ cat $HOPSFILE | grep -v as1 | awk -F, '{print $1,$2,$3,$4,$5,$6}' | \ while read as1 AS1 as2 AS2 count rate ; do if test "$as1" = "$as2" ; then continue ; fi - printf "$as1->$as2 [ label=\"%0.2f\"];\n" "$rate" + printf "$as1->$as2 [ label=\"%0.2f\"];\n" "$count" done cat <