In [8]:
import json
import math
import random

# Load Data
with open('visualization/graph_data_full.json', 'r') as f:
    content = f.read().replace(': NaN', ': null')
    full_data = json.loads(content)

nodes_key = 'nodes'
edges_key = 'edges' if 'edges' in full_data else 'links'

# 1. Identify Nodes by ID for easy lookup
all_nodes_dict = {n['id']: n for n in full_data[nodes_key]}
lineage_ids = set(n['id'] for n in full_data[nodes_key] if n.get('type') == 'lineage')
airport_ids = set(n['id'] for n in full_data[nodes_key] if n.get('type') == 'airport')

# 2. Select Edges First (The core structure)
viz_edges = []
active_node_ids = set()

# A. Always keep Lineage connections (Sampling / Evolution)
for edge in full_data[edges_key]:
    src, dst = edge['source'], edge['target']
    
    # If it involves a lineage, keep it!
    if src in lineage_ids or dst in lineage_ids:
        viz_edges.append(edge)
        active_node_ids.add(src)
        active_node_ids.add(dst)

# B. Add Flight Edges (Only for airports that are now "active" or connected to active ones)
# To make it look connected, we can sample flights between active airports
flight_edges = [e for e in full_data[edges_key] if e.get('type') == 'flight']
random.shuffle(flight_edges)

# Keep flights between already active airports (to show density)
for edge in flight_edges:
    src, dst = edge['source'], edge['target']
    if src in active_node_ids and dst in active_node_ids:
        if random.random() < 0.1: # 10% chance to show flight
             viz_edges.append(edge)

# C. If graph is too small, add random high-degree airports + their flights
if len(active_node_ids) < 100:
    # (Optional logic to expand graph if needed)
    pass

# 3. Select Only Active Nodes
viz_nodes = [all_nodes_dict[nid] for nid in active_node_ids if nid in all_nodes_dict]

# 4. Clean Float Values
for n in viz_nodes:
    if 'lat' in n: n['lat'] = 0.0 if n['lat'] is None else float(n['lat'])
    if 'lon' in n: n['lon'] = 0.0 if n['lon'] is None else float(n['lon'])

viz_data = {
    "nodes": viz_nodes,
    "links": viz_edges
}

with open('visualization/graph_data_viz.json', 'w') as f:
    json.dump(viz_data, f)

print(f"Saved! Nodes: {len(viz_data['nodes'])} (All Connected), Links: {len(viz_data['links'])}")

Saved! Nodes: 576 (All Connected), Links: 9976
