In [1]:
import networkx as nx

In [2]:
filename = 'data/graph_collapse.txt'

In [3]:
G = nx.Graph()

with open(filename, 'r') as file:
    for line in file:
        node1, node2 = line.strip().split()
        G.add_edge(node1, node2)

components = list(nx.connected_components(G))
components.sort(key=len, reverse=True)
largest_component = components[0]
G_largest = G.subgraph(largest_component).copy()
other_components = components[1:]
G_others = nx.Graph()
for component in other_components:
    G_others = nx.compose(G_others, G.subgraph(component))

In [4]:
def characteristics(g):
    num_vertices = g.number_of_nodes()
    
    num_edges = g.number_of_edges()
    
    if nx.is_connected(g):
        diameter = nx.diameter(g)
    else:
        components = list(nx.connected_components(G))
        avg_diameter = 0
        num_components = len(components)
        for component in components:
            subgraph = G.subgraph(component)
            try:
                avg_diameter += nx.diameter(subgraph)
            except nx.NetworkXError:
                continue
        if num_components > 0:
            avg_diameter /= num_components
        diameter = avg_diameter
    density = nx.density(g)
    print(f"- Number of vertices: {num_vertices}")
    print(f"- Number of edges: {num_edges}")
    print(f"- Diameter: {diameter}")
    print(f"- Density: {density}")

In [5]:
print('Full Graph:')
characteristics(G)
print('Largest Component:')
characteristics(G_largest)
print('Small Components:')
characteristics(G_others)

Full Graph:
- Number of vertices: 5101
- Number of edges: 5222
- Diameter: 2.142105263157895
- Density: 0.00040145915256908487
Largest Component:
- Number of vertices: 2125
- Number of edges: 2871
- Diameter: 34
- Density: 0.001272183449651047
Small Components:
- Number of vertices: 2976
- Number of edges: 2351
- Diameter: 2.142105263157895
- Density: 0.0005310834011023765
