In [1]:
# load gexf files
import networkx as nx

# Load the GEXF files
G_now = nx.read_gexf("EP_final_without_DNV.gexf")
G_past = nx.read_gexf("EP_previous_final_without_DNV.gexf")


In [5]:
# print basic info about the networks
print(f"Current network: {G_now.number_of_nodes()} nodes, {G_now.number_of_edges()} edges")
print(f"Past network: {G_past.number_of_nodes()} nodes, {G_past.number_of_edges()} edges")

# print the first few nodes and edges of the networks
print(list(G_now.nodes())[:5])
print(list(G_past.edges())[:5])

Current network: 712 nodes, 125239 edges
Past network: 700 nodes, 98590 edges
['169362:ABSTENTION', '169401:ABSTENTION', '169418:ABSTENTION', '169418:FOR', '169419:ABSTENTION']
[('127022:FOR', '127023:FOR'), ('127022:FOR', '127024:FOR'), ('127022:FOR', '127025:FOR'), ('127022:FOR', '127026:FOR'), ('127022:FOR', '127027:FOR')]


In [10]:
# Analyze clustering in the two networks using the community detection algorithm
from networkx.algorithms.community import greedy_modularity_communities

# Detect communities in each network
communities_now = list(greedy_modularity_communities(G_now))
communities_past = list(greedy_modularity_communities(G_past))

print(f"Number of communities in current network: {len(communities_now)}")
print(f"Number of communities in past network: {len(communities_past)}")

# Get the sizes of the communities
sizes_now = sorted([len(c) for c in communities_now], reverse=True)
sizes_past = sorted([len(c) for c in communities_past], reverse=True)

print("Community sizes in current network:", sizes_now)
print("Community sizes in past network:", sizes_past)

Number of communities in current network: 2
Number of communities in past network: 2
Community sizes in current network: [409, 303]
Community sizes in past network: [412, 288]


In [11]:
# Compare clustering coefficients
avg_clustering_now = nx.average_clustering(G_now)
avg_clustering_past = nx.average_clustering(G_past)

print(f"Average clustering coefficient (current): {avg_clustering_now:.4f}")
print(f"Average clustering coefficient (past): {avg_clustering_past:.4f}")

# Compare modularity scores using the found communities
def modularity(G, communities):
    return nx.algorithms.community.modularity(G, communities)

modularity_now = modularity(G_now, communities_now)
modularity_past = modularity(G_past, communities_past)

print(f"Modularity (current network): {modularity_now:.4f}")
print(f"Modularity (past network): {modularity_past:.4f}")

Average clustering coefficient (current): 0.7172
Average clustering coefficient (past): 0.7000
Modularity (current network): 0.4134
Modularity (past network): 0.4332


In [12]:
# Compare betweenness centrality
betweenness_now = nx.betweenness_centrality(G_now)
betweenness_past = nx.betweenness_centrality(G_past)

# Compare silhouette scores
import numpy as np
from sklearn.metrics import silhouette_score

def calculate_silhouette_score_from_graph(G, communities):
    if G.is_directed():
        G = G.to_undirected()
    nodes = list(G.nodes())
    node_to_index = {node: i for i, node in enumerate(nodes)}
    X = nx.to_numpy_array(G, nodelist=nodes, dtype=float)
    labels = np.empty(len(nodes), dtype=int)
    for community_index, community in enumerate(communities):
        for node in community:
            labels[node_to_index[node]] = community_index
    if len(np.unique(labels)) < 2:
        raise ValueError("Silhouette score requires at least 2 clusters.")
    return silhouette_score(X, labels, metric="cosine")

silhouette_now = calculate_silhouette_score_from_graph(G_now, communities_now)
silhouette_past = calculate_silhouette_score_from_graph(G_past, communities_past)

print(f"Silhouette score (current network): {silhouette_now:.4f}")
print(f"Silhouette score (past network): {silhouette_past:.4f}")

Silhouette score (current network): 0.5570
Silhouette score (past network): 0.5523
