### Create connected component graphs for visualization purposes

In [None]:
from collections import defaultdict
import pickle

import networkx as nx
from pyvis.network import Network

import graph_utils as gu
import settings

Load data from python binaries. Processed by the preprocessing notebook.

In [19]:
G = pickle.load(open('resources/create_graph/full_graph.pickle', 'rb'))
edge_map = pickle.load(open('resources/create_graph/edge_map.pickle', 'rb'))

In [20]:
# Graph Depth 2 (Only for visualization)
d2_graph = nx.Graph()
for node in G.nodes():
    subgraph = nx.ego_graph(G, node, radius=2)
    d2_graph.add_edges_from(subgraph.edges(data=True))
d2_graph = nx.Graph(d2_graph)


# Seperate graph into connected components
components = list(nx.connected_components(d2_graph))


In [21]:
def create_html(component: set, index: int) -> None:
    """
    Creates a html visualization of a given component, given index is used for 
    naming the resulting file.

    :param component: Set of Nodes for the component subgraph.
    :param index: Index for naming output file
    """
    component_graph = d2_graph.subgraph(component)

    net = Network(notebook=False, height="100vh", bgcolor="#222222", font_color="white")

    for node in component_graph.nodes:
        net.add_node(node, title=node, label=node.split(settings.SEPERATOR)[0])

    # Calculcate the local maximum of edge weights
    try:
        component_maximum = max(
            edge_map[pair].weight for pair in component_graph.edges
            if pair in edge_map
        )
    except ValueError:
        # If no edges exist, set default component maximum on 1
        component_maximum = 1
        #print(f"Component {index + 1} has no edges in edge_map. Use default value for component_maximum.")

    # Add edges with weights and Information
    edge_to_languages = defaultdict(tuple)
    for u, v, _ in component_graph.edges(data=True):
        pair = tuple(sorted([u, v]))
        if pair in edge_map:
            edge = edge_map[pair]
            value = edge.weight
            languages = edge.value
            width = edge.normalized(component_maximum)
            net.add_edge(u, v, value=value, languages=', '.join(languages), width=width)
            edge_to_languages[pair] = edge.value

    # Save graph in a html file
    output_file = f"resources/graph_visualization/component_graphs/component_{index + 1}_graph.html"
    net.save_graph(output_file)

    # Add dynamic language legend
    gu.add_dynamic_legend_to_component_graph(output_file)

Create a HTML graph visualization for every component from the depth-2 subgraph.
As well as a short summary of pahts and random walks.

In [24]:
components = list(nx.connected_components(d2_graph))

for index, component in enumerate(components):
    create_html(component=component, index=index)
    
    component_graph = d2_graph.subgraph(component)

    # Get the longest path
    longest_path = gu.find_longest_path_approx(component_graph)

    # Random walk
    start_node = list(component_graph.nodes)[0]
    walk_length = longest_path
    random_walk = gu.random_walk_in_component(component_graph, start_node, walk_length)

    summary = {
        "component_id": index + 1,
        "num_nodes": component_graph.number_of_nodes(),
        "num_edges": component_graph.number_of_edges(),
        "longest_path": walk_length,
    }

    gu.save_results_to_file(summary, f"resources/graph_visualization/component_graphs/component_{index + 1}_summary.json")

    #print(f"Component {index + 1} processed and saved.")

