In [1]:
import networkx as nx
import numpy as np

# For diplaying figures within the notebook we require: MatPlotLib
import matplotlib 
import matplotlib.pyplot as plt


In [7]:
import sys
sys.path.insert(0,'..')
from src.data.create_stochastic_block_model import create_louvain_communities_dict,store_in_file,create_graph_and_node_mappings_from_file
from src.data.data_loader import GraphDataset
from torch_geometric.utils import to_networkx

In [2]:
# SOURCE: https://github.com/jeroenvldj/bow-tie_detection/blob/master/Detecting_Bow-tie_structures_in_directed_networks.ipynb
def get_bowtie_components(graph):
    '''Classifying the nodes of a network into a bow-tie structure.
    Here we follow the paper: 
    "Bow-tie Decomposition in Directed Graphs" - Yang et al. IEEE (2011) 
    
    input:  NetworkX directed graph or numpy adjacency matrix
    output: sets of nodes in the specified partitions (following the 
            NetworkX input graph node labelling or labelled according to
            the order of the adjacency matrix [0, n-1])
    '''
    import networkx as nx
    
    # Verify graph input format
    input_formats = [nx.DiGraph, np.ndarray, np.matrix]
    assert type(graph) in input_formats, 'Input should be a NetworkX directed graph or numpy adjacency matrix'
    if type(graph) == nx.classes.digraph.DiGraph:
        G = graph.copy()
    if (type(graph) == np.ndarray) | (type(graph) == np.matrix):
        G = nx.from_numpy_matrix(np.matrix(graph), create_using=nx.DiGraph())
    
    GT = nx.reverse(G, copy=True)
    
    strongly_con_comp = list(nx.strongly_connected_components(G))    
    strongly_con_comp = max(strongly_con_comp, key=len)

    S = strongly_con_comp

    v_any = list(S)[0]
    DFS_G = set(nx.dfs_tree(G,v_any).nodes())
    DFS_GT = set(nx.dfs_tree(GT,v_any).nodes())
    OUT = DFS_G - S
    IN = DFS_GT - S
    V_rest = set(G.nodes()) - S - OUT - IN

    TUBES = set()
    INTENDRILS = set()
    OUTTENDRILS = set()
    OTHER = set()
    for v in V_rest:
        irv = len(IN & set(nx.dfs_tree(GT,v).nodes())) is not 0
        vro = len(OUT & set(nx.dfs_tree(G,v).nodes())) is not 0
        if irv and vro:
            TUBES.add(v)
        elif irv and not vro:
            INTENDRILS.add(v)
        elif not irv and vro:
            OUTTENDRILS.add(v)
        elif not irv and not vro:
            OTHER.add(v)
            
    return S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER


In [15]:
def get_node_colors(G):
    [S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER] = get_bowtie_components(G)
    node_colors = []
    for i, node in enumerate(G.nodes):
        if node in IN:
            node_colors.append('blue')
        elif node in S:
            node_colors.append('red')
        elif node in OUT:
            node_colors.append('green')
        else:
            node_colors.append('grey')
    return node_colors #,{'S':S, 'IN':IN, 'OUT':OUT, 'TUBES':TUBES, 'INTENDRILS':INTENDRILS, 'OUTTENDRILS':OUTTENDRILS, 'OTHER':OTHER}


In [16]:

def bow_tie_vis(G):
    plt.figure()
    node_colors = get_node_colors(G)

    nx.draw_shell(G, node_color=node_colors, arrowsize=20)
    plt.title('Bow-tie structure of grandom graph')
    plt.figtext(1, 0.6, "IN: nodes linking into the core", fontsize=15, color='blue')
    plt.figtext(1, 0.5, "Core nodes", fontsize=15, color='red')
    plt.figtext(1, 0.4, 'OUT: nodes reached from the core', fontsize=15, color='green')
    plt.show()

In [11]:
datasets = datasets = 'cora citeseer pubmed cora_full'.split()

In [21]:
for dataset_name in datasets:
    dataset = GraphDataset(f'../data/tmp/{dataset_name}_directed', dataset_name,
                       f'../data/graphs/processed/{dataset_name}/{dataset_name}.cites',
                       f'../data/graphs/processed/{dataset_name}/{dataset_name}.content',
                       directed=True, reverse=False)[0]
    G = to_networkx(dataset)
    [S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER] = get_bowtie_components(G)
    d = {'S':S, 'IN':IN, 'OUT':OUT, 'TUBES':TUBES, 'INTENDRILS':INTENDRILS, 'OUTTENDRILS':OUTTENDRILS, 'OTHER':OTHER}
    print(f'\n{dataset_name}')
    for group in d:
        print(f'|{group}| = {len(d[group])}')



cora
|S| = 13
|IN| = 343
|OUT| = 26
|TUBES| = 121
|INTENDRILS| = 299
|OUTTENDRILS| = 728
|OTHER| = 955

citeseer
|S| = 10
|IN| = 8
|OUT| = 10
|TUBES| = 1
|INTENDRILS| = 1
|OUTTENDRILS| = 54
|OTHER| = 2026

pubmed
|S| = 2
|IN| = 2153
|OUT| = 0
|TUBES| = 0
|INTENDRILS| = 2229
|OUTTENDRILS| = 0
|OTHER| = 15333

cora_full
|S| = 2268
|IN| = 8238
|OUT| = 1652
|TUBES| = 1292
|INTENDRILS| = 1357
|OUTTENDRILS| = 2784
|OTHER| = 1112


In [22]:
for dataset_name in datasets:
    i = 0
    dataset = GraphDataset(f'../data/tmp/{dataset_name}_directed-sbm{i}', dataset_name,
                       f'../data/graphs/sbm/{dataset_name}/{dataset_name}_sbm_{i}.cites',
                       f'../data/graphs/processed/{dataset_name}/{dataset_name}.content',
                       directed=True, reverse=False)[0]
    G = to_networkx(dataset)
    [S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER] = get_bowtie_components(G)
    d = {'S':S, 'IN':IN, 'OUT':OUT, 'TUBES':TUBES, 'INTENDRILS':INTENDRILS, 'OUTTENDRILS':OUTTENDRILS, 'OTHER':OTHER}
    print(f'\n{dataset_name}')
    for group in d:
        print(f'|{group}| = {len(d[group])}')


cora
|S| = 1389
|IN| = 485
|OUT| = 396
|TUBES| = 9
|INTENDRILS| = 38
|OUTTENDRILS| = 61
|OTHER| = 107

citeseer
|S| = 412
|IN| = 471
|OUT| = 121
|TUBES| = 24
|INTENDRILS| = 300
|OUTTENDRILS| = 200
|OTHER| = 582

pubmed
|S| = 11541
|IN| = 3257
|OUT| = 3150
|TUBES| = 222
|INTENDRILS| = 387
|OUTTENDRILS| = 383
|OTHER| = 777

cora_full
|S| = 16347
|IN| = 1179
|OUT| = 1006
|TUBES| = 11
|INTENDRILS| = 31
|OUTTENDRILS| = 28
|OTHER| = 101


In [23]:
for dataset_name in datasets:
    i = 0
    dataset = GraphDataset(f'../data/tmp/{dataset_name}_directed-confmodel{i}', dataset_name,
                       f'../data/graphs/confmodel/{dataset_name}/{dataset_name}_confmodel_{i}.cites',
                       f'../data/graphs/processed/{dataset_name}/{dataset_name}.content',
                       directed=True, reverse=False)[0]
    G = to_networkx(dataset)
    [S, IN, OUT, TUBES, INTENDRILS, OUTTENDRILS, OTHER] = get_bowtie_components(G)
    d = {'S':S, 'IN':IN, 'OUT':OUT, 'TUBES':TUBES, 'INTENDRILS':INTENDRILS, 'OUTTENDRILS':OUTTENDRILS, 'OTHER':OTHER}
    print(f'\n{dataset_name}')
    for group in d:
        print(f'|{group}| = {len(d[group])}')


cora
|S| = 259
|IN| = 1431
|OUT| = 228
|TUBES| = 102
|INTENDRILS| = 254
|OUTTENDRILS| = 135
|OTHER| = 76

citeseer
|S| = 24
|IN| = 594
|OUT| = 85
|TUBES| = 134
|INTENDRILS| = 572
|OUTTENDRILS| = 285
|OTHER| = 416

pubmed
|S| = 716
|IN| = 7369
|OUT| = 1795
|TUBES| = 239
|INTENDRILS| = 709
|OUTTENDRILS| = 7618
|OTHER| = 1271

cora_full
|S| = 7038
|IN| = 8823
|OUT| = 1809
|TUBES| = 105
|INTENDRILS| = 390
|OUTTENDRILS| = 465
|OTHER| = 73
