# Construct the graph

In [1]:
import igraph
import pandas as pd
raw_source = pd.read_csv('deidentified_pub_record.csv')

In [2]:
print(igraph.__version__)

0.8.2


In [3]:
# Prepare for the node-list
node_list = [str(SubjectID) for SubjectID in raw_source['SubjectID'].unique()]

# Then, the edge-list
edge_list = []
# Read in the raw records
edge_df = raw_source.groupby('edge_ID')['SubjectID'].apply(list).reset_index(name='author_list')

from itertools import combinations
# Genearte edge_list from bag of authors per article
edge_list = []
for index, row in edge_df.iterrows():
    author_list = row['author_list']
    doi = row['edge_ID']
    if len(author_list) == 1:
        # Add a self-loop:
        edge_list.append([author_list[0], author_list[-1], doi])
    else:
        # Add pairs of authors into an edge
        for pair in combinations(author_list, 2):
            edge_list.append(
                [pair[0], pair[-1], doi]
            )

In [4]:
# Prepare the edgelist
edge_tuple_list = [(str(edge[0]), str(edge[1])) for edge in edge_list]
edge_label_list = [edge[-1] for edge in edge_list]

In [5]:
# Compose a graph from edge-list, with DOI attributes for edges
g = igraph.Graph()
g.add_vertices(node_list)
g.add_edges(edge_tuple_list)

In [6]:
# Add he edge-id
g.es['edgeid'] = edge_label_list
g.summary()

'IGRAPH UN-- 4321 7285 -- \n+ attr: name (v), edgeid (e)'

# Attempt to get a quotient graph for the giant component
Though, the partition from community_leiden is random across runs

In [7]:
components = sorted(g.components(), key=len, reverse=True)
igiant_component = components[0]

In [8]:
i_giant_graph = g.subgraph(igiant_component, implementation='copy_and_delete')
i_giant_graph.summary()

'IGRAPH UN-- 2004 4533 -- \n+ attr: name (v), edgeid (e)'

In [9]:
part1 = g.subgraph(igiant_component).community_leiden(objective_function='modularity', n_iterations=10)
quotient_graph = igraph.VertexClustering(i_giant_graph, membership=part1.membership)
quotient_graph = quotient_graph.cluster_graph(combine_edges=list ) # list here is helpful for collecting the DOIs
quotient_graph.summary()

'IGRAPH U--- 43 133 -- \n+ attr: edgeid (e)'

In [10]:
part2 = g.subgraph(igiant_component).community_leiden(objective_function='modularity', n_iterations=10)
quotient_graph = igraph.VertexClustering(i_giant_graph, membership=part2.membership)
quotient_graph = quotient_graph.cluster_graph(combine_edges=list ) # list here is helpful for collecting the DOIs
quotient_graph.summary()

'IGRAPH U--- 43 131 -- \n+ attr: edgeid (e)'

In [11]:
part3 = g.subgraph(igiant_component).community_leiden(objective_function='modularity', n_iterations=10)
quotient_graph = igraph.VertexClustering(i_giant_graph, membership=part3.membership)
quotient_graph = quotient_graph.cluster_graph(combine_edges=list ) # list here is helpful for collecting the DOIs
quotient_graph.summary()

'IGRAPH U--- 42 127 -- \n+ attr: edgeid (e)'

In [12]:
part1 == part2

False

In [13]:
g.community_leiden?

[0;31mSignature:[0m
[0mg[0m[0;34m.[0m[0mcommunity_leiden[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mobjective_function[0m[0;34m=[0m[0;34m'CPM'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mweights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mresolution_parameter[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbeta[0m[0;34m=[0m[0;36m0.01[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitial_membership[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_iterations[0m[0;34m=[0m[0;36m2[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnode_weights[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
community_leiden(objective_function=CPM, weights=None, 
resolution_parameter=1.0, beta=0.01, initial_membership=None,
n_iterations=2, node_weights=None)

Finds the community structure of the graph using the
Leiden algorithm of Traa