# Construct the graph

In [1]:
import igraph
import pandas as pd
raw_source = pd.read_csv('deidentified_pub_record.csv')

In [2]:
print(igraph.__version__)

0.8.2


In [3]:
# Prepare for the node-list
node_list = [str(SubjectID) for SubjectID in raw_source['SubjectID'].unique()]

# Then, the edge-list
edge_list = []
# Read in the raw records
edge_df = raw_source.groupby('edge_ID')['SubjectID'].apply(list).reset_index(name='author_list')

from itertools import combinations
# Genearte edge_list from bag of authors per article
edge_list = []
for index, row in edge_df.iterrows():
    author_list = row['author_list']
    doi = row['edge_ID']
    if len(author_list) == 1:
        # Add a self-loop:
        edge_list.append([author_list[0], author_list[-1], doi])
    else:
        # Add pairs of authors into an edge
        for pair in combinations(author_list, 2):
            edge_list.append(
                [pair[0], pair[-1], doi]
            )

In [4]:
# Prepare the edgelist
edge_tuple_list = [(str(edge[0]), str(edge[1])) for edge in edge_list]
edge_label_list = [edge[-1] for edge in edge_list]

In [5]:
# Compose a graph from edge-list, with DOI attributes for edges
g = igraph.Graph()
g.add_vertices(node_list)
g.add_edges(edge_tuple_list)

In [6]:
# Add he edge-id
g.es['edgeid'] = edge_label_list
g.summary()

'IGRAPH UN-- 4321 7285 -- \n+ attr: name (v), edgeid (e)'

# Attempt to get a quotient graph for the giant component
Though, the partition from community_leiden is random across runs

In [14]:
# igraph has its own way of seeding
import random
import igraph as ig
ig.set_random_number_generator(random)
random.seed("2020-07-01") # Or any other number of course

In [31]:
components = sorted(g.components(), key=len, reverse=True)
igiant_component = components[0]

In [32]:
i_giant_graph = g.subgraph(igiant_component, implementation='copy_and_delete')
i_giant_graph.summary()

'IGRAPH UN-- 2004 4533 -- \n+ attr: name (v), edgeid (e)'

In [33]:
ig.set_random_number_generator(random)
random.seed("2020-07-01") # Or any other number of course
part1 = g.subgraph(igiant_component).community_leiden(objective_function='modularity', n_iterations=10)
quotient_graph = igraph.VertexClustering(i_giant_graph, membership=part1.membership)
quotient_graph = quotient_graph.cluster_graph(combine_edges=list ) # list here is helpful for collecting the DOIs
quotient_graph.summary()

'IGRAPH U--- 43 132 -- \n+ attr: edgeid (e)'

In [34]:
ig.set_random_number_generator(random)
random.seed("2020-07-01") # Or any other number of course
part2 = g.subgraph(igiant_component).community_leiden(objective_function='modularity', n_iterations=10)
quotient_graph = igraph.VertexClustering(i_giant_graph, membership=part2.membership)
quotient_graph = quotient_graph.cluster_graph(combine_edges=list ) # list here is helpful for collecting the DOIs
quotient_graph.summary()

'IGRAPH U--- 43 132 -- \n+ attr: edgeid (e)'

In [35]:
ig.set_random_number_generator(random)
random.seed("2020-07-01") # Or any other number of course
part3 = g.subgraph(igiant_component).community_leiden(objective_function='modularity', n_iterations=10)
quotient_graph = igraph.VertexClustering(i_giant_graph, membership=part3.membership)
quotient_graph = quotient_graph.cluster_graph(combine_edges=list ) # list here is helpful for collecting the DOIs
quotient_graph.summary()

'IGRAPH U--- 43 132 -- \n+ attr: edgeid (e)'

In [36]:
list(part1) == list(part2)

True

In [37]:
list(part2) == list(part3)

True

In [38]:
list(part1) == list(part3)

True