### In this notbook we will study the network scientist co-authorship data
1) Uncover communities with Louvain algorithm
2) Extract local and global component 
3) Study the repartion of communities in the composition of global components
> - Hierarchy of global components
> - number of nodes
> - number of edges
> - the repartition of edges makes with other communities
4) Uncover core-periphery from global component
5) Study the repartion of communities in the core of the global components
> - number of nodes
> - number of edges
> - the repartition of edges makes with other communities
6) Identify the most important community in the network scientist co-authorship graph
7) Identify the most important nodes

In [2]:
# importation of modules
import cpnet
import math
import pandas as pd
import networkx as nx
from community import community_louvain
import matplotlib.pyplot as plt
from networkx.algorithms import community
import pyvis
from pyvis import network as net
import graph_tool.all as gt
import polars as pl
import json

## Class

In [3]:
# Classes and Functions Definition
class ComponentStructure:
    
    def __init__(self, graph, partition: dict):
        self.graph = nx.Graph.copy(graph)
        self.partition = partition
        self.communities_id = self.__get_communities()
        self.local_components = []
        self.global_component = {}
        self.lc_density = {}
        
    def set_local_components(self):
        for community_id in self.communities_id:
            nodes = set([k for k, v in self.partition.items() if v == community_id])
            part = {k: v for k, v in self.partition.items() if v == community_id}
            local_component = {"lc": self.graph.subgraph(nodes), "partition": part}
            self.local_components.append(local_component) 
        return
    
    def remove_intra_community_edges(self):
        temp_graph = nx.Graph.copy(self.graph)
        for community_id in self.communities_id:
            nodes = set([k for k, v in self.partition.items() if v == community_id])
            for node in nodes:
                neighbors = set(nx.neighbors(temp_graph, node))
                inner_nodes = neighbors.intersection(nodes)
                for inner_node in inner_nodes:
                    temp_graph.remove_edge(node, inner_node)
        temp_graph.remove_nodes_from(list(nx.isolates(temp_graph)))
        part = {k: v for k, v in self.partition.items() if k in temp_graph.nodes()}
        self.global_component = {'gp':temp_graph, 'part':part}
        return 
    
    def local_components_density(self):
        i = 0;
        for local_component in self.local_components:
            self.lc_density["lc"+str(i)] = nx.density(local_component["lc"])
            i = i+1
        return self.local_components_density
    
    def __get_communities(self) -> set:
        return set(self.partition.values())

## Functions

In [4]:
def drawer(net: pyvis.network, graph: nx.Graph(),  fixed: bool, partition=None):
    if( partition != None):
        for node in graph.nodes:
            graph.nodes[node]['group'] = partition[node]
            graph.nodes[node]['label'] = str(partition[node])
    nt = net.Network('500px', '500px')
    grid = net.Network(notebook=True)
    grid.from_nx(graph)
    grid.toggle_physics(fixed)
    return grid.show("test.html")

def drawer_lc(net: pyvis.network, graph: nx.Graph(),  fixed: bool, community_id=None, color=None):
    if( partition != None):
        for node in graph.nodes:
            graph.nodes[node]['color'] = color
            graph.nodes[node]['label'] = str(community_id)
    nt = net.Network('500px', '500px')
    grid = net.Network(notebook=True)
    grid.from_nx(graph)
    grid.toggle_physics(fixed)
    return grid.show("test.html")

def important_of_node(graph, node):
    groupes = []
    list_of_neighbors = list(nx.neighbors(graph, node))
    graph_data = dict(graph.nodes.data())
    neighbors_data = {k:v for k, v in graph_data.items() if k in list_of_neighbors}
    
    for k in neighbors_data.keys():
        groupes.append(neighbors_data[k]['group'])
    communities_groupes = list(set(groupes))
    #same_communities_neighbors = {k:v for k, v in neighbors_data.items() if v['group'] == communities_groupes[0]}
    number_of_nodes = len(communities_groupes)
    graph.nodes[node]['size'] = 10*number_of_nodes
    return 

def core_density(graph, partition):
    periphery_nodes = list({k:v for k, v in partition.items() if v == 0})
    temp_graph = nx.Graph.copy(graph)
    temp_graph.remove_nodes_from(periphery_nodes)
    return nx.density(temp_graph)

def periphery_density(graph, partition):
    periphery_nodes = list({k:v for k, v in partition.items() if v == 1})
    temp_graph = nx.Graph.copy(graph)
    temp_graph.remove_nodes_from(periphery_nodes)
    return nx.density(temp_graph)

def core_periphery_density(graph, partition):
    core_nodes = list({k:v for k, v in partition.items() if v == 1})
    periphery_nodes = list({k:v for k, v in partition.items() if v == 0})
    number_of_nc = len(core_nodes)
    number_of_np = len(periphery_nodes)
    temp_graph = nx.Graph.copy(graph)
    for node in core_nodes:
        neighbors = set(nx.neighbors(graph, node))
        inner_nodes = neighbors.intersection(core_nodes)
        for inner_node in inner_nodes:
            try:
                temp_graph.remove_edge(node, inner_node)
            except:
                pass
    density = len(temp_graph)/(number_of_nc*number_of_np)
    return density

def densities(graph, partition):
    return {
    "core-core": core_density(graph, partition),
    "core-periphery": core_periphery_density(graph, partition),
    "periphery-periphery": periphery_density(graph, partition) }

def json_writer(data, path, file_name):
    file = open(path+"/"+file_name+".json", "w")
    json.dump(data, file, indent=6)
    file.close()

In [5]:
# Load data
fil_name = '../data/edge_list.xlsx'
dataset = pd.ExcelFile(fil_name)
sheets = dataset.sheet_names
data = dataset.parse(sheet_name=sheets[0])
data.head()

Unnamed: 0,edge_size,source_node,target_node
0,2,"Franchi, Enrico","Franchi, Enrico"
1,1,"Mili, Fatma","Mohan, Srikar"
2,1,"Mili, Fatma","Qu, Guangzhi"
3,1,"Mohan, Srikar","Qu, Guangzhi"
4,4,"Gao, Liang","Gao, Zi-You"


In [13]:
Root_graph = nx.from_pandas_edgelist(data, source='source_node', target='target_node', create_using=nx.Graph())
print("Number of edges before sanitize "+str(len(Root_graph.edges())))
print("Number of nodes before sanitize "+str(len(Root_graph.nodes())))

Number of edges before sanitize 359584
Number of nodes before sanitize 56646


In [7]:
selfloop_edges = list(nx.selfloop_edges(Root_graph))
print("We have  "+str(len(selfloop_edges))+ " selfloops in the graph")
Root_graph.remove_edges_from(selfloop_edges)
selfloop_edges = list(nx.selfloop_edges(Root_graph))
print("We have  "+str(len(selfloop_edges))+ " selfloops in the graph")

isolates_nodes = list(nx.isolates(Root_graph))
print("We have  "+str(len(isolates_nodes))+ " isolates nodes in the graph")
Root_graph.remove_nodes_from(isolates_nodes)
isolates_nodes = list(nx.isolates(Root_graph))
print("We have  "+str(len(isolates_nodes))+ " isolates nodes in the graph")
print("Number of edges after sanitize "+str(len(Root_graph.edges())))
print("Number of nodes after sanitize "+str(len(Root_graph.nodes())))

We have  1999 selfloops in the graph
We have  0 selfloops in the graph
We have  897 isolates nodes in the graph
We have  0 isolates nodes in the graph
Number of edges after sanitize 357585
Number of nodes after sanitize 55749


In [7]:
#Store graph as file
graph_json = nx.node_link_data(Root_graph)
json_writer(graph_json, "../data", "co_authorship_graph")

#### 1 Uncovering communities

In [8]:
communities_partition = community_louvain.best_partition(Root_graph, resolution=1.0, randomize=True)
number_of_communities = len(set(communities_partition.values()))
print("Number of communities is: "+ str(number_of_communities))

Number of communities is: 4961


In [9]:
json_writer(communities_partition, "../data/partitions", "co_authorship_communities_louvain_partitions")

In [9]:
Component_Structure = ComponentStructure(Root_graph, communities_partition)
Component_Structure.set_local_components()
Component_Structure.remove_intra_community_edges()

In [11]:
gc = Component_Structure.global_component["gp"]
gc_partitions = Component_Structure.global_component["part"]

In [12]:
gc_json = nx.node_link_data(gc)
json_writer(gc_json, "../data", "global_component_co_authorship")

In [13]:
json_writer(gc_partitions, "../data/partitions", "gloabla_component_co_authorship_partitions")

In [14]:
index = 1
for lc_component in Component_Structure.local_components:
    lc_json = nx.node_link_data(lc_component["lc"])
    lc_partition = lc_component["partition"]
    json_writer(lc_json, "../data", "local_component_co_authorship"+str(index))
    json_writer(lc_partition, "../data/partitions", "local_component_co_authorship_partition"+str(index))
    index = index + 1

In [15]:
gc_list = list(nx.connected_components(gc))
index = 1
for gc_node in gc_list:
    gc_c = nx.subgraph(gc, gc_node)
    gc_c_json = nx.node_link_data(gc_c)
    pa = {k:v for k,v in gc_partitions.items() if k in list(gc_c.nodes())}
    json_writer(gc_c_json, "../data", "global_component_co_authorship"+str(index))
    json_writer(pa, "../data/partitions", "global_component_co_authorship_partition"+str(index))
    index = index + 1

#### Study repartition of communities in the global components

In [16]:
def extract_global_component(gc):
    global_component_list_nodes = {}
    global_component_list_edges = {}
    connected_components = list(nx.connected_components(gc))
    i = 1
    for connected_component in connected_components:
        graph = gc.subgraph(connected_component)
        global_component_list_nodes["gc_"+str(i)] = len(graph.nodes)
        global_component_list_edges["gc_"+str(i)] = len(graph.edges)
        i = i+1
    return global_component_list_nodes, global_component_list_edges

def communities_nodes_count(gc, partitions):
    communities_list_nodes = {}
    index = 1
    for community_id in partitions.values():
        nmb_nodes = len(list({k:v for k, v in partitions.items() if v == community_id}))
        communities_list_nodes["community_"+str(index)] = nmb_nodes
        index = index + 1
    return communities_list_nodes

def edges_count_of_community(gc, community_id, partitions):
    numbers_of_nodes = 0
    nodes = list({k:v for k, v in partitions.items() if v == community_id})
    for node in nodes:
        neighbors = list(nx.neighbors(gc, node))
        numbers_of_nodes = numbers_of_nodes + len(neighbors)
    return numbers_of_nodes  

def edges_count_of_communities(gc, partitions):
    communities_edges = {}
    index = 1
    for community_id in partitions.values():
        edges = edges_count_of_community(gc, community_id, partitions)
        communities_edges["comm_"+str(index)] = edges
        index = index + 1
    return communities_edges

def numbers_of_communities_connected(gc, community_id, partitions):
    comm_neighbors = set()
    nodes = list({k:v for k, v in partitions.items() if v == community_id})
    for node in nodes:
        neighbors = set(nx.neighbors(gc, node))
        c_neighbors = set({k:v for k, v in partitions.items() if k in neighbors}.values())
        comm_neighbors = comm_neighbors.union(c_neighbors)
    return comm_neighbors

def connected_comm_count(gc, partitions):
    c_connected_communities = {}
    c_connected_communities_count = {}
    index = 1
    for community_id in partitions.values():
        nbr_comm = numbers_of_communities_connected(gc, community_id, partitions)
        comm_neighbors = numbers_of_communities_connected(gc, community_id, partitions)
        c_connected_communities_count["comm_"+str(index)] = comm_neighbors
        c_connected_communities["comm_"+str(index)] = len(comm_neighbors)
        index = index + 1
    return c_connected_communities, c_connected_communities_count

In [18]:
algo = cpnet.BE()
algo.detect(gc)
x = algo.get_coreness()
c = algo.get_pair_id()

In [19]:
for k in x:
    x[k] = int(x[k])

In [20]:
json_writer(x, "../data/core_periphery_partitions", "global_component_cp")

In [21]:
gc_list = list(nx.connected_components(gc))
index = 0
for gc_node in gc_list:
    gc_c = nx.subgraph(gc, gc_node)
    algo = cpnet.BE()
    algo.detect(gc_c)
    x = algo.get_coreness()
    c = algo.get_pair_id()
    for k in x:
        x[k] = int(x[k])
    json_writer(x, "../data/core_periphery_partitions", "global_component_cp"+str(index))
    index = index + 1

In [12]:
index = 1
for lc in Component_Structure.local_components:
    algo = cpnet.BE()
    algo.detect(lc['lc'])
    x = algo.get_coreness()
    c = algo.get_pair_id()
    for k in x:
        x[k] = int(x[k])
    json_writer(x, "../data/core_periphery_partitions", "local_component_cp"+str(index))
    index = index + 1