In [None]:
import subprocess, shlex, time, re, socket, os, json, glob
from datetime import datetime
from urllib.parse import urlparse
from collections import defaultdict, Counter
import networkx as nx
import matplotlib.pyplot as plt
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading

In [None]:
TRACEROUTE_CMD = "traceroute -q 1 -w 5 -m 64"
RESULTS_FOLDER = "traceroute_results"
VISUALIZATIONS_FOLDER = "visualizations"
SOURCE_LOCATION = "Ryon_111"

os.makedirs(RESULTS_FOLDER, exist_ok=True)
os.makedirs(VISUALIZATIONS_FOLDER, exist_ok=True)

TIMEOUT_PER_TRACEROUTE = 180  # seconds
MAX_THREADS = 15          # maximum number of parallel threads

destinations = [
	"aut.ac.ir",
	"www.wsj.com",
	"www.coolmathgames.com",
	"asce.rice.edu",
	"www.pokemon.co.jp",
	"www.nytimes.com",
	"www.iitb.ac.in",
	"umich.edu",
	"www.taobao.com",
	"www.universalorlando.com",
	"fried.rice.edu",
	"portal.ehawaii.gov",
	"yu-gi-oh.jp"
]

### Part 2: load all results from different source ips and do analysis and visualization

In [None]:
def load_all_results(folder=RESULTS_FOLDER):
	"""Load all JSON results from folder"""
	if not os.path.exists(folder):
		print(f"Results folder {folder} not found!")
		return []
	
	json_files = glob.glob(os.path.join(folder, "traceroute_*.json"))
	results = []
	
	for filepath in json_files:
		try:
			with open(filepath, 'r') as f:
				data = json.load(f)
				results.append(data)
				print(f"Loaded: {os.path.basename(filepath)} (source: {data['source_ip']})")
		except Exception as e:
			print(f"Error loading {filepath}: {e}")
	
	print(f"Loaded {len(results)} result files")
	return results

def filter_none_hops(hops):
	"""Filter out None hops (unknown/timeout hops) from route"""
	filtered = []
	for hop in hops:
		# Keep hop if it has valid host, IP, or RTT data
		if hop.get('host') or hop.get('ip') or hop.get('rtt_ms') is not None:
			filtered.append(hop)
		# Skip hops where all fields are None/empty
	return filtered

def filter_hop_map(hop_map):
	"""Filter out None hops from entire hop map"""
	filtered_map = {}
	for host, hops in hop_map.items():
		filtered_hops = filter_none_hops(hops)
		if filtered_hops:
			filtered_map[host] = filtered_hops
	return filtered_map

In [None]:
def node_id(h):
	"""Get unique node identifier for a hop"""
	if h.get('ip'):
		return h['ip']
	if h.get('host'):
		return h['host']
	return f"hop{h['hop']}_unknown"

def node_label(h):
	"""Get display label for a hop"""
	if h.get('host') and h.get('ip'):
		return f"{h['host']}\n{h['ip']}"
	if h.get('ip'):
		return h['ip']
	if h.get('host'):
		return h['host']
	return "*"

def classify_node_individual(nid, route_map, source_ip):
	"""Classify node type for individual graph visualization"""
	# Get destinations
	destinations = set(route_map.keys())
	
	# Count how many routes pass through this node
	route_count = 0
	for dest, hops in route_map.items():
		for hop in hops:
			if node_id(hop) == nid:
				route_count += 1
				break
	
	# Classification logic
	if nid == source_ip:
		return 'source'
	elif nid in destinations or any(nid in dest for dest in destinations):
		return 'destination'  
	elif route_count > 1:
		return 'multi_route'
	elif '.' not in nid or nid.count('.') != 3:  # Not an IP
		return 'hostname'
	elif nid != "*" and "unknown" not in nid:
		return 'ip_only'
	else:
		return 'unknown'

def build_individual_graph(route_map, source_ip, route_status, source_location=SOURCE_LOCATION):
	"""Build undirected, unweighted graph from individual result with node classification"""
	G = nx.Graph()
	
	# Add source node
	source_label = "{}({})".format(source_ip, source_location)
	G.add_node(source_ip, label=source_label, node_type='source')

	# Process each route
	for dest, hops in route_map.items():
		dest_status = route_status.get(dest, False)
		prev_node = source_ip  # Start from source
		
		# Add destination node (always add it)
		G.add_node(dest, label=dest, node_type='destination')
		
		# Process hops in the route
		for i, hop in enumerate(hops):
			if i == len(hops) - 1 and dest_status:
				# If destination is reached, skip adding last hop to avoid duplication
				continue
			nid = node_id(hop)
			label = nid
			
			# Skip nodes that would be created from None hops
			if nid.startswith("hop") and "unknown" in nid:
				continue
				
			if nid not in G:
				node_type = classify_node_individual(nid, route_map, source_ip)
				G.add_node(nid, label=label, node_type=node_type)
			
			# Connect to previous node (no weights)
			if prev_node != nid:
				G.add_edge(prev_node, nid)
			
			prev_node = nid
		
		# Handle destination connectivity based on dest_status
		if dest_status:
			# If dest_status is True, connect last hop to destination
			if hops and prev_node != dest:
				G.add_edge(prev_node, dest)

	return G

def get_node_visual_props(node_type):
	"""Get size and color for different node types"""
	type_props = {
		'source': {'size': 800, 'color': 'red'},
		'destination': {'size': 800, 'color': 'blue'}, 
		'multi_route': {'size': 400, 'color': 'orange'},
		'hostname': {'size': 200, 'color': 'green'},
		'ip_only': {'size': 100, 'color': 'gray'},
		'unknown': {'size': 100, 'color': 'lightgray'}
	}
	return type_props.get(node_type, {'size': 200, 'color': 'gray'})

def graph_descriptors(G: nx.Graph):
	"""Calculate graph metrics"""
	V = G.number_of_nodes()
	E = G.number_of_edges()
	comps = list(nx.connected_components(G))
	C = len(comps)

	density = nx.density(G) if V > 1 else 0.0
	avg_degree = (2*E / V) if V > 0 else 0.0
	efficiency = nx.global_efficiency(G) if V > 1 else 0.0
	clustering = nx.average_clustering(G) if V > 1 else 0.0
	cyclomatic = E - V + C

	if V >= 3:
		denom = max(1, 2*V - 5)
		meshedness = max(0.0, min(1.0, (E - V + C) / denom))
	else:
		meshedness = 0.0

	return {
		'nodes': V,
		'links': E,
		'components': C,
		'density': density,
		'avg_degree': avg_degree,
		'meshedness_est': meshedness,
		'efficiency': efficiency,
		'avg_clustering': clustering,
		'cyclomatic_number': cyclomatic,
	}

# def visualize_individual_graph(G, source_ip, result_info, figsize=(16,12), save_path=None, is_show=True):
# 	"""Visualize individual graph"""
# 	if G.number_of_nodes() == 0:
# 		print(f"No nodes to visualize for {source_ip}!")
# 		return

# 	plt.figure(figsize=figsize)

# 	# Calculate layout
# 	pos = nx.spring_layout(G, seed=42, k=1.0, iterations=50)
	
# 	# Separate nodes by type for different visual properties
# 	node_types_viz = {}
# 	for node, data in G.nodes(data=True):
# 		node_type = data.get('node_type', 'unknown')
# 		if node_type not in node_types_viz:
# 			node_types_viz[node_type] = []
# 		node_types_viz[node_type].append(node)
	
# 	# Draw nodes by type
# 	for node_type, nodes in node_types_viz.items():
# 		props = get_node_visual_props(node_type)
# 		nx.draw_networkx_nodes(G, pos, nodelist=nodes, 
# 							 node_size=props['size'], 
# 							 node_color=props['color'],
# 							 alpha=0.8,
# 							 label=f"{node_type} ({len(nodes)})")
	
# 	# Draw edges
# 	nx.draw_networkx_edges(G, pos, width=1.0, alpha=0.6, edge_color='gray')
	
# 	# Draw labels
# 	labels = nx.get_node_attributes(G, 'label')
# 	nx.draw_networkx_labels(G, pos, labels=labels, font_size=9, font_weight='bold')
	
# 	plt.axis('off')
# 	plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', labelspacing=2.5)
# 	plt.title(f'Network Topology from Source: {source_ip}\n'
# 			  f'Timestamp: {result_info["timestamp"]} | '
# 			  f'{G.number_of_nodes()} nodes, {G.number_of_edges()} edges', 
# 			  fontsize=14, pad=20)
# 	plt.tight_layout()
# 	if save_path:
# 		plt.savefig(save_path, dpi=400)
# 	if is_show:
# 		plt.show()

In [None]:
def visualize_individual_graph(G, source_ip, result_info, figsize=(16,12), save_path=None, is_show=True):
    """Visualize individual graph with source at bottom and destinations at top"""
    if G.number_of_nodes() == 0:
        print(f"No nodes to visualize for {source_ip}!")
        return

    plt.figure(figsize=figsize)

    # Create custom layout with source at bottom, destinations at top
    pos = create_hierarchical_layout(G, source_ip)
    
    # Separate nodes by type for different visual properties
    node_types_viz = {}
    for node, data in G.nodes(data=True):
        node_type = data.get('node_type', 'unknown')
        if node_type not in node_types_viz:
            node_types_viz[node_type] = []
        node_types_viz[node_type].append(node)
    
    # Draw nodes by type
    for node_type, nodes in node_types_viz.items():
        props = get_node_visual_props(node_type)
        nx.draw_networkx_nodes(G, pos, nodelist=nodes, 
                             node_size=props['size'], 
                             node_color=props['color'],
                             alpha=0.8,
                             label=f"{node_type} ({len(nodes)})")
    
    # Draw edges
    nx.draw_networkx_edges(G, pos, width=1.0, alpha=0.6, edge_color='gray')
    
    # Draw labels
    labels = nx.get_node_attributes(G, 'label')
    nx.draw_networkx_labels(G, pos, labels=labels, font_size=9, font_weight='bold')
    
    plt.axis('off')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', labelspacing=2.5)
    plt.title(f'Network Topology from Source: {source_ip}\n'
              f'Timestamp: {result_info["timestamp"]} | '
              f'{G.number_of_nodes()} nodes, {G.number_of_edges()} edges', 
              fontsize=14, pad=20)
    plt.tight_layout()
    if save_path:
        plt.savefig(save_path, dpi=400)
    if is_show:
        plt.show()

def create_hierarchical_layout(G, source_ip):
    """
    Create a hierarchical layout with source at bottom and destinations at top.
    
    Parameters:
    -----------
    G : nx.Graph
        NetworkX graph
    source_ip : str
        Source node identifier
        
    Returns:
    --------
    dict
        Position dictionary with (x, y) coordinates for each node
    """
    pos = {}
    
    # Identify node types
    source_nodes = []
    destination_nodes = []
    intermediate_nodes = []
    
    for node, data in G.nodes(data=True):
        node_type = data.get('node_type', 'unknown')
        if node_type == 'source':
            source_nodes.append(node)
        elif node_type == 'destination':
            destination_nodes.append(node)
        else:
            intermediate_nodes.append(node)
    
    # Calculate shortest paths from source to determine hierarchy levels
    try:
        # Use BFS to find distances from source
        if source_nodes:
            source_node = source_nodes[0]  # Should be only one source
            distances = nx.single_source_shortest_path_length(G, source_node)
        else:
            distances = {node: 0 for node in G.nodes()}
    except:
        # Fallback if graph is disconnected
        distances = {node: 0 for node in G.nodes()}
    
    # Group nodes by their distance from source (hierarchy level)
    levels = {}
    max_distance = max(distances.values()) if distances else 0
    
    for node, distance in distances.items():
        if distance not in levels:
            levels[distance] = []
        levels[distance].append(node)
    
    # Handle disconnected destination nodes (move them to top level)
    for dest in destination_nodes:
        if dest not in distances:
            if max_distance + 1 not in levels:
                levels[max_distance + 1] = []
            levels[max_distance + 1].append(dest)
    
    # Position nodes level by level
    y_spacing = 2.0  # Vertical spacing between levels
    x_spacing = 1.5  # Horizontal spacing between nodes
    
    for level, nodes in levels.items():
        # Calculate y position (source at bottom = level 0, destinations at top)
        y_pos = level * y_spacing
        
        # Calculate x positions to center nodes horizontally
        num_nodes = len(nodes)
        if num_nodes == 1:
            x_positions = [0]
        else:
            total_width = (num_nodes - 1) * x_spacing
            x_positions = [-total_width/2 + i * x_spacing for i in range(num_nodes)]
        
        # Assign positions
        for i, node in enumerate(nodes):
            pos[node] = (x_positions[i], y_pos)
    
    # Special handling: ensure source is at the bottom and destinations at top
    if source_nodes:
        pos[source_nodes[0]] = (0, 0)  # Source at bottom center
    
    # Move all destination nodes to the top level
    if destination_nodes:
        max_y = max([pos[node][1] for node in pos.keys()]) if pos else 0
        top_y = max_y + y_spacing
        
        # Spread destinations horizontally at the top
        num_dests = len(destination_nodes)
        if num_dests == 1:
            dest_x_positions = [0]
        else:
            dest_total_width = (num_dests - 1) * x_spacing
            dest_x_positions = [-dest_total_width/2 + i * x_spacing for i in range(num_dests)]
        
        for i, dest in enumerate(destination_nodes):
            pos[dest] = (dest_x_positions[i], top_y)
    
    return pos

In [None]:
"""Load all results and create individual visualizations"""
# Load all results
all_results = load_all_results()
if not all_results:
	print("No results found to analyze!")
	raise ValueError("No results found")

In [None]:
for i, result in enumerate(all_results, 1):
	source_ip = result['source_ip']
	source_location = result['source_location']
	timestamp = result['timestamp']
	route_map = result['route_map']
	route_status = result['statuses']

	print(f"\n{'='*60}")
	print(f"RESULT {i}/{len(all_results)}: Source IP {source_ip} ({timestamp})")
	print(f"{'='*60}")

	# Filter None hops from routes
	print("Filtering None hops from routes...")
	filtered_route_map = filter_hop_map(route_map)

	if not filtered_route_map:
		print("No valid routes after filtering!")
		raise ValueError("No valid routes after filtering")
		
	print(f"Valid routes after filtering: {len(filtered_route_map)}")
	for dest, hops in filtered_route_map.items():
		print(f"  {dest}: {len(hops)} valid hops")
	# Build individual graph
	print("Building graph...")
	G = build_individual_graph(filtered_route_map, source_ip, route_status, source_location=source_location)

	# Calculate metrics
	metrics = graph_descriptors(G)
	print(f"\nNetwork metrics for {source_ip}:")
	for k, v in metrics.items():
		if isinstance(v, float):
			print(f"  {k:18s}: {v:.4f}")
		else:
			print(f"  {k:18s}: {v}")
	# Node type analysis
	node_types = Counter()
	for node, data in G.nodes(data=True):
		node_types[data.get('node_type', 'unknown')] += 1

	print(f"\nNode type distribution:")
	for node_type, count in node_types.most_common():
		print(f"  {node_type:12s}: {count}")

	# Hop count analysis
	print(f"\nHop counts per destination:")
	timings = result.get('timings', {})
	for dest, hops in filtered_route_map.items():
		timing = timings.get(dest, 0)
		print(f"  {dest:35s}  hops: {len(hops):2d}  time: {timing:.3f}s")

	# Visualize individual graph
	print(f"\nVisualizing graph for {source_ip}...")
	visualize_individual_graph(G, source_ip, result, figsize=(20, 20),
		save_path=f"{VISUALIZATIONS_FOLDER}/graph_{source_ip}_{source_location}_{timestamp}.pdf", is_show=False)

print(f"\n{'='*60}")
print(f"ANALYSIS COMPLETE - Processed {len(all_results)} source(s)")
print(f"{'='*60}")