In [1]:
import json
import os
import csv
import requests
import pickle
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm_notebook

In [2]:
with open("./parse-traceroutes-output.json") as f:
	json_dict = json.load(f)

tr_methods = list(json_dict.keys())

tr_methods

['udp', 'icmp', 'tcp', 'paris', 'dublin', '0trace']

In [3]:
def get_last_successful_hop_data(hops):
	"""Extracts the hop number, lowest rtt, and corresponding IP from last successful hop."""
	for hop in reversed(hops):
		rtts = [str(t) for t in hop['RTTs']]
		if ''.join(rtts) in ('*', '***'):
			continue
		
		rtts = [float(t) for t in rtts if t != '*']
		ips = [ip for ip in hop['HopIPs'] if ip != '*']
		
		best_rtt = min(rtts)
		best_ip = ips[rtts.index(best_rtt)]
		hop_num = hop['TTL']

		return hop_num, best_ip, best_rtt


In [4]:
if os.path.exists("asn_cache.pkl"):
    with open("asn_cache.pkl", "rb") as f:
        asn_cache = pickle.load(f)
else:
    asn_cache = {}

def check_asn_match(ip1, ip2):
    """Determines whether the traceroute reached the target network by comparing ASNs"""
    asn_sets = []
    for ip in [ip1, ip2]:
        if ip in asn_cache:
            asns = asn_cache[ip]
        else:
            r = requests.get(f'https://stat.ripe.net/data/network-info/data.json?resource={ip}')
            if r.status_code != 200:
                return -1
            asns = set(r.json()['data']['asns'])
            asn_cache[ip] = asns     
        asn_sets.append(asns)
    
    if asn_sets[0].intersection(asn_sets[1]):
        return 1
    return 0
        

In [5]:
results = {}

for tr in tr_methods:
	print(f'Processing {tr}...')
	results[tr] = {}
	tr_data = json_dict[tr]

	for tr_dict in tqdm_notebook(tr_data, desc='IPs'):
		dest_ip = tr_dict['DestinationIP']
		if dest_ip not in results[tr]:
			hops = tr_dict['Hops']
			last_hop, last_ip, last_rtt = get_last_successful_hop_data(hops)
			target_reached = check_asn_match(dest_ip, last_ip)
			results[tr][dest_ip] = {'last_hop': last_hop, 'last_ip': last_ip, 'last_rtt': last_rtt, 'target_reached': target_reached}


Processing udp...


IPs:   0%|          | 0/985 [00:00<?, ?it/s]

Processing icmp...


IPs:   0%|          | 0/985 [00:00<?, ?it/s]

Processing tcp...


IPs:   0%|          | 0/985 [00:00<?, ?it/s]

Processing paris...


IPs:   0%|          | 0/985 [00:00<?, ?it/s]

Processing dublin...


IPs:   0%|          | 0/2955 [00:00<?, ?it/s]

Processing 0trace...


IPs:   0%|          | 0/323 [00:00<?, ?it/s]

In [6]:
if not os.path.exists("asn_cache.pkl"):
	with open("asn_cache.pkl", "wb") as f:
		pickle.dump(asn_cache, f)

In [7]:
for tr, res in results.items():
	print(f'{tr:8}: {len(res.keys())} data points')

udp     : 985 data points
icmp    : 985 data points
tcp     : 985 data points
paris   : 985 data points
dublin  : 975 data points
0trace  : 323 data points


In [8]:
ips = list(results['dublin'].keys())

with open ('data_no_0trace.csv', 'w', newline='') as f:
	writer = csv.writer(f)
	writer.writerow(['Detination IP', 'Type', 'Last Hop #', 'Last IP', 'Last RTT', 'Target Reached'])
	for ip in ips:
		for tr in tr_methods:
			if tr == "0trace":
				continue
			row = results[tr][ip]
			writer.writerow([ip, tr, row['last_hop'], row['last_ip'], row['last_rtt'], row['target_reached']])


In [9]:
ips = set(results["dublin"].keys()).intersection(set(results["0trace"].keys()))
print(f"{len(ips)} data points")

with open ('data.csv', 'w', newline='') as f:
	writer = csv.writer(f)
	writer.writerow(['Detination IP', 'Type', 'Last Hop #', 'Last IP', 'Last RTT', 'Target Reached'])
	for ip in ips:
		for tr in tr_methods:
			row = results[tr][ip]
			writer.writerow([ip, tr, row['last_hop'], row['last_ip'], row['last_rtt'], row['target_reached']])


213 data points
