In [1]:
import networkx as nx
import pandas as pd
from tqdm import tqdm

In [2]:
graph = "stexpanded"
community_algorithm = "bigclam"

graph_file = "data/filtered_triples_weighted/" + graph + ".triples"
communities_file = "./results/communities_" + community_algorithm + "/" + graph + "_10000.txt"

In [3]:
graph_df = pd.read_csv(graph_file, sep="###", engine="python", header=None)
G = nx.Graph()
for row in graph_df.itertuples():
    G.add_edge(int(row[1]), int(row[2]))
    # G.add_edge(int(row[1]), int(row[2]), weight=int(row[3]))

communities = []
with open(communities_file) as cf:
    for line in cf:
        numbers_set = {int(num) for num in line.strip().split("\t")}
        communities.append(numbers_set)

community_vector = pd.DataFrame({'id': sorted(G.nodes)})

In [4]:
# print(community_vector)

In [5]:
def overlapping_community_degree_centrality(G, communities):
    centrality_scores = {}
    for community in communities:
        subgraph = G.subgraph(community)
        centrality = nx.degree_centrality(subgraph)
        for node, score in centrality.items():
            centrality_scores.setdefault(node, []).append(score)
    return {node: sum(scores) / len(scores) for node, scores in centrality_scores.items()}

In [6]:
centrality_dict = overlapping_community_degree_centrality(G, communities)

In [7]:
community_vector['degree_centrality'] = community_vector['id'].map(centrality_dict)

In [8]:
def overlapping_community_betweenness_centrality(G, communities):
    centrality_scores = {}
    for community in tqdm(communities):
        subgraph = G.subgraph(community)
        centrality = nx.betweenness_centrality(subgraph)
        for node, score in centrality.items():
            centrality_scores.setdefault(node, []).append(score)
    return {node: sum(scores) / len(scores) for node, scores in centrality_scores.items()}

In [9]:
centrality_dict = overlapping_community_betweenness_centrality(G, communities)

100%|██████████| 9635/9635 [03:25<00:00, 46.99it/s] 


In [10]:
community_vector['betweenness_centrality'] = community_vector['id'].map(centrality_dict)

In [11]:
def overlapping_community_closeness_centrality(G, communities):
    centrality_scores = {}
    for community in tqdm(communities):
        subgraph = G.subgraph(community)
        centrality = nx.closeness_centrality(subgraph)
        for node, score in centrality.items():
            centrality_scores.setdefault(node, []).append(score)
    return {node: sum(scores) / len(scores) for node, scores in centrality_scores.items()}

In [12]:
centrality_dict = overlapping_community_closeness_centrality(G, communities)

100%|██████████| 9635/9635 [01:42<00:00, 94.22it/s] 


In [13]:
community_vector['closeness_centrality'] = community_vector['id'].map(centrality_dict)

In [15]:
community_vector.to_csv('results/community_vector.csv', sep=';', index=False)