In [None]:
import networkx as nx
import pandas as pd
import community
import numpy as np

In [None]:
def load_graph(filename):
    df = pd.read_csv(filename, header=None, names=['source', 'target', 'weight'])
    G = nx.from_pandas_edgelist(df, edge_attr='weight', create_using=nx.Graph())
    return G

In [None]:
def add_inference_edges(G, edge_list):
    edges = pd.read_csv(edge_list, header = None)
    edges.columns = ['src', 'dst', 'weight']
    edge_tuples = []
    for i in range(edges.shape[0]):
        src = edges.iloc[i, 0]
        dst = edges.iloc[i, 1]
        weight = edges.iloc[i, 2]
        edge_tuples.append((src, dst, weight))
    G.add_weighted_edges_from(edge_tuples)
    return G

In [None]:
def write_out_results(reverse_comms, filename):
    # Write results to file
    f = open('results/' + filename, 'a')
    for cmty, nodes in reverse_comms.items():
        f.write("Community %d:\n" % cmty)
        for node in nodes:
            f.write(nodes_mapping[node] + "\n")
        f.write("\n")

In [None]:
G = load_graph('data/reddit_nodes_weighted_full.csv')
print('OG edges:', len(G.edges()))
G = add_inference_edges(G, 'new_edges/linreg_node2vec-256_edges_to_add.csv')
print('New edges: ', len(G.edges()))

In [None]:
# run Louvain
partition = community.best_partition(G, randomize = True)

In [None]:
modularity = community.modularity(partition, G)
print("modularity: " + str(modularity))

In [None]:
communities = {}
for v in partition.values():
    communities[v] = communities.get(v, 0) + 1
print("Louvian detected %d communities" % len(communities.keys()))

In [None]:
# reverse the dict
comms = dict(partition)
reverse_comms = {}
for key, val in comms.items():
    reverse_comms[val] = reverse_comms.get(val, []) + [key]
print(len(reverse_comms.keys()))

In [None]:
# Load nodes mapping
nodes_mapping = np.load('data/reverse_nodes_mapping.npy', allow_pickle = True)
nodes_mapping = nodes_mapping.item()

In [None]:
write_out_results(reverse_comms, 'louvain_dense_rolx.txt')