In [None]:
import subprocess, shlex, time, re, socket, os, json, glob
from datetime import datetime
from urllib.parse import urlparse
from collections import defaultdict, Counter
import networkx as nx
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

### Part 1: Run traceroute and save the result to a json file

In [None]:
TRACEROUTE_CMD = "traceroute -q 1 -w 5 -m 64"
RESULTS_FOLDER = "traceroute_results"
VISUALIZATIONS_FOLDER = "visualizations"
SOURCE_LOCATION = "HAMMAN"

os.makedirs(RESULTS_FOLDER, exist_ok=True)
os.makedirs(VISUALIZATIONS_FOLDER, exist_ok=True)

TIMEOUT_PER_TRACEROUTE = 180  # seconds
MAX_THREADS = 15          # maximum number of parallel threads

destinations = [
	"aut.ac.ir",
	"www.wsj.com",
	"www.coolmathgames.com",
	"asce.rice.edu",
	"www.pokemon.co.jp",
	"www.nytimes.com",
	"www.iitb.ac.in",
	"umich.edu",
	"www.taobao.com",
	"www.universalorlando.com",
	"fried.rice.edu",
	"portal.ehawaii.gov",
	"yu-gi-oh.jp"
]

In [None]:
def url_to_host(s: str) -> str:
	s = s.strip()
	if not s:
		return s
	if "://" not in s:
		return s.split('/')[0]
	p = urlparse(s)
	return (p.netloc or p.path).split('/')[0]

def run_traceroute(host: str, timeout: int = 300):
	"""
	Run macOS `traceroute` with proper timeout handling.
	Returns (elapsed_seconds, stdout_text).
	"""
	cmd = f"{TRACEROUTE_CMD} {shlex.quote(host)}"
	t0 = time.time()
	
	try:
		# Use run() with timeout for proper timeout handling
		result = subprocess.run(
			cmd,
			shell=True,
			capture_output=True,
			text=True,
			timeout=timeout
		)
		elapsed = time.time() - t0
		return True, elapsed, result.stdout
		
	except subprocess.TimeoutExpired as e:
		elapsed = time.time() - t0
		# Get partial output if available
		partial_output = e.stdout.decode('utf-8') if e.stdout else ""
		return False, elapsed, partial_output + "\n[TIMEOUT AFTER {:.1f}s]\n".format(elapsed)
		
	except Exception as e:
		elapsed = time.time() - t0
		return False, elapsed, f"[ERROR: {e}]\n"

def run_traceroute_with_progress(host: str, timeout: int = 300):
	"""
	Wrapper function that includes progress reporting.
	"""
	print(f"Starting traceroute to {host}...")
	status, elapsed, output = run_traceroute(host, timeout)
	print(f"Completed traceroute to {host} in {elapsed:.2f}s")
	return host, status, elapsed, output

hop_line_re = re.compile(r"^\s*(\d+)\s+(.+)$")

def parse_traceroute(text: str):
	"""
	Parse macOS traceroute output into a list of hops.
	Each hop: {'hop': int, 'host': str|None, 'ip': str|None, 'rtt_ms': float|None}
	Uses only the first probe (since -q 1).
	"""
	hops = []
	for line in text.splitlines():
		m = hop_line_re.match(line)
		if not m:
			continue
		hop_no = int(m.group(1))
		rest = m.group(2)

		host = None
		ip = None
		rtt = None

		if "*" in rest:
			hops.append({'hop': hop_no, 'host': None, 'ip': None, 'rtt_ms': None})
			continue

		# hostname (ip)  12.345 ms
		m2 = re.search(r"([^\s]+)\s+\((\d+\.\d+\.\d+\.\d+)\)\s+([\d\.]+)\s+ms", rest)
		if m2:
			host, ip, rtt = m2.group(1), m2.group(2), float(m2.group(3))
		else:
			# ip  12.345 ms
			m3 = re.search(r"(\d+\.\d+\.\d+\.\d+)\s+([\d\.]+)\s+ms", rest)
			if m3:
				ip, rtt = m3.group(1), float(m3.group(2))
				# try reverse lookup to get hostname if possible
				try:
					host = socket.gethostbyaddr(ip)[0]
				except Exception:
					host = None
			else:
				# hostname  12.345 ms
				m4 = re.search(r"([^\s]+)\s+([\d\.]+)\s+ms", rest)
				if m4:
					host, rtt = m4.group(1), float(m4.group(2))

		hops.append({'hop': hop_no, 'host': host, 'ip': ip, 'rtt_ms': rtt})

	return hops

In [None]:
# Convert URLs to hostnames
hosts = [url_to_host(dest) for dest in destinations]

# Initialize storage
route_map = {}     # host -> hops list
timings = {}       # host -> elapsed seconds
raw_outputs = {}   # host -> raw text
statuses = {}     # host -> success status (True/False)

print("Starting parallel traceroute execution...")
start_time = time.time()

# Run traceroutes in parallel
with ThreadPoolExecutor(max_workers=min(len(hosts), int(MAX_THREADS))) as executor:
	# Submit all tasks
	future_to_host = {
		executor.submit(run_traceroute_with_progress, host, TIMEOUT_PER_TRACEROUTE): host 
		for host in hosts
	}
	
	# Process completed tasks as they finish
	for future in as_completed(future_to_host):
		try:
			host, status, elapsed, output = future.result()
			
			# Store results
			route_map[host] = parse_traceroute(output)
			timings[host] = elapsed
			raw_outputs[host] = output
			statuses[host] = status
			
			print(f"\n=== Results for {host} ===")
			print(f"Elapsed: {elapsed:.2f}s")
			print("Raw output first lines:")
			print("\n".join(output.splitlines()[:10]))  # Print first 10 lines
			
		except Exception as exc:
			original_host = future_to_host[future]
			print(f"Traceroute to {original_host} generated an exception: {exc}")
			# Store error results
			timings[original_host] = 0
			raw_outputs[original_host] = f"[EXCEPTION: {exc}]\n"
			route_map[original_host] = []
			statuses[original_host] = False

total_elapsed = time.time() - start_time

print(f"\n=== SUMMARY ===")
print(f"Total execution time: {total_elapsed:.2f}s")
print(f"Processed {len(hosts)} hosts in parallel")
print("\nIndividual timings:")
for host, elapsed in sorted(timings.items(), key=lambda x: x[1], reverse=True):
	status = "COMPLETED" if route_map[host] else "FAILED/TIMEOUT"
	print(f"  {host:30s}  {elapsed:7.2f}s  [{status}]")

print(f"\nSuccessful traces: {sum(1 for hops in route_map.values() if hops)}/{len(hosts)}")

In [None]:
for h, hops in route_map.items():
	print(f"{h}, {len(hops)}, {statuses[h]}") 

In [None]:
def save_traceroute_results(route_map, timings, raw_outputs, statuses, source_ip):
	"""Save traceroute results to JSON file"""
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
	filename = f"traceroute_{source_ip}_{timestamp}.json"
	filepath = os.path.join(RESULTS_FOLDER, filename)
	
	data = {
		'source_ip': source_ip,
		'source_location': SOURCE_LOCATION,
		'timestamp': timestamp,
		'destinations': list(route_map.keys()),
		'route_map': route_map,
		'timings': timings,
		'raw_outputs': raw_outputs,
		'statuses': statuses
	}
	
	with open(filepath, 'w') as f:
		json.dump(data, f, indent=2)
	
	print(f"Results saved to: {filepath}")
	return filepath

In [None]:
save_traceroute_results(route_map, timings, raw_outputs, statuses, socket.gethostbyname(socket.gethostname()))