In [1]:
%matplotlib inline

import networkx as nx
import pandas as pd
import numpy as np
import scipy as sp

from tqdm import tqdm

import itertools
import sys

sys.path.append("../..")
from networkentropy import network_energy as ne, network_energy_gradient as neg, network_utils as nu

## Correlaction between energy gradient and length of the shortest path for all node pairs
### For sythetic networks

In [2]:
results = []
graphs = []
methods = ['graph', 'laplacian', 'randic']
generators = ['random', 'smallworld', 'waxman', 'powerlaw']
p_max = 4
p_values = list(range(1,p_max))

def create_graph(p, generator, p_max, num_nodes=100):
    if generator == 'random':
        return nx.erdos_renyi_graph(n=num_nodes, p=p/(p_max*10))
    elif generator == 'smallworld':
        return nx.watts_strogatz_graph(n=num_nodes, k=4, p=p/p_max)
    elif generator == 'waxman':
        return nx.waxman_graph(n=num_nodes, alpha=p/p_max, beta=0.1)
    elif generator == 'powerlaw':
        return nx.powerlaw_cluster_graph(n=num_nodes, m=3, p=p/(p_max*10))
    else:
        raise ValueError('Generator: {} does not exist'.format(generator))
        
p_values_generators = list(itertools.product(p_values, generators))
for p, generator in tqdm(p_values_generators):
    G = create_graph(p, generator, p_max)
    graphs.append((p, generator, G))
    shortest_path_lengths = nx.all_pairs_shortest_path_length(G)
    decorated_graph = neg.get_graph_with_energy_data(G, methods, copy=False)
    for lengths in shortest_path_lengths:
        source = lengths[0]
        for target, length in lengths[1].items():
            for method in methods:
                gradient = decorated_graph.get_gradient(source, target, method)
                results.append((p, generator, method, source, target, length, gradient))
                
p, generator, graph = list(zip(*graphs))
graphs_df = pd.DataFrame({'p': p, 'generator': generator, 'graph': graph})
            
p, generator, method, source, target, length, gradient = map(list, zip(*results))
df = pd.DataFrame({'p': p, 'generator': generator, 'method': method, 'source': source, 'target': target, 
                   'length': length, 'gradient': gradient})

100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:05<00:00,  2.27it/s]


In [3]:
correlations = []
for p, generator, method in itertools.product(p_values, generators, methods):
    sub_df = df.query("p=='{}' and generator=='{}' and method=='{}'".format(p, generator, method))
    #leave out negative equivalents
    sub_df = sub_df.query("target > source")
    sub_df = sub_df.assign(abs_gradient=np.abs(sub_df.gradient))
    x = list(sub_df.length)
    y = list(sub_df.abs_gradient)
    corr = np.corrcoef(x, y)
    correlations.append((p, generator, method, corr[0][1]))

In [4]:
p, generator, method, corr = list(zip(*correlations))
corr_df = pd.DataFrame({'p': p, 'generator': generator, 'method': method, 'corr': corr})

In [5]:
corr_df.sort_values('corr')

Unnamed: 0,corr,generator,method,p
22,-0.371548,powerlaw,laplacian,2
34,-0.369942,powerlaw,laplacian,3
21,-0.363794,powerlaw,graph,2
23,-0.363434,powerlaw,randic,2
33,-0.360462,powerlaw,graph,3
35,-0.352214,powerlaw,randic,3
10,-0.330121,powerlaw,laplacian,1
11,-0.316963,powerlaw,randic,1
9,-0.315945,powerlaw,graph,1
20,-0.202091,waxman,randic,2


### For empirical networks

In [6]:
datasets = nu.create_datasets('konect.cc').filter(min_size=50, max_size=400, max_density=0.3)

In [7]:
datasets.to_df().head()

Unnamed: 0,name,category,num_nodes,num_edges,tsv_url
11,iceland,Human contact network,75,114,http://konect.cc/files/download.tsv.iceland.ta...
15,dolphins,Animal network,62,159,http://konect.cc/files/download.tsv.dolphins.t...
17,brunson_revolution,Affiliation network,141,160,http://konect.cc/files/download.tsv.brunson_re...
18,edit-gnwikibooks,Authorship network,68,62,http://konect.cc/files/download.tsv.edit-gnwik...
19,edit-rmwikibooks,Authorship network,70,67,http://konect.cc/files/download.tsv.edit-rmwik...


In [8]:
networks = datasets.download_and_build_networks('data/')

100% [..............................................................................] 14735 / 14735

In [9]:
len(networks)

99

In [10]:
results = []
for row, g in tqdm(list(zip(datasets.to_df().iterrows(), networks))):
    data = row[1]
    shortest_path_lengths = nx.all_pairs_shortest_path_length(g)
    decorated_graph = neg.get_graph_with_energy_data(g, methods, copy=False)
    for method in methods:
        x = []
        y = []
        for lengths in shortest_path_lengths:
            source = lengths[0]
            for target, length in lengths[1].items():
                if target > source:
                    gradient = np.abs(decorated_graph.get_gradient(source, target, method))
                    x.append(gradient)
                    y.append(length)
        if len(x) > 0:
            corr = np.corrcoef(x, y)[0][1]
            pearson = sp.stats.pearsonr(x, y)
            spearman = sp.stats.spearmanr(x, y)
            kendall = sp.stats.kendalltau(x, y)
            results.append((data['name'], data['category'], method, corr, pearson, spearman, kendall))

100%|██████████████████████████████████████████████████████████████████████████████████| 99/99 [04:18<00:00,  2.61s/it]


In [11]:
name, category, method, corr, pearson, spearman, kendall = list(zip(*results))

In [12]:
empirical_corrs_df = pd.DataFrame({'name': name, 'category': category, 'method': method,
                                   'corr': corr, 'pearson': pearson, 'spearman': spearman, 'kendall': kendall})

In [13]:
empirical_corrs_df.sort_values('corr')

Unnamed: 0,category,corr,kendall,method,name,pearson,spearman
3,Authorship network,-0.523494,"(-0.566629023334, 0.0)",graph,edit-gnwikibooks,"(-0.523493725325, 2.32082396477e-319)","(-0.669582715545, 0.0)"
26,Authorship network,-0.512662,"(-0.501487699686, 0.0)",graph,edit-nahwikibooks,"(-0.512662251746, 0.0)","(-0.595273375242, 0.0)"
5,Authorship network,-0.510824,"(-0.488994938551, 2.54880028662e-261)",graph,edit-angwikisource,"(-0.510824348415, 3.20068923876e-220)","(-0.596267440093, 5.39737074143e-319)"
9,Authorship network,-0.507760,"(-0.555734015272, 0.0)",graph,edit-biwikibooks,"(-0.507760074235, 0.0)","(-0.660094532888, 0.0)"
12,Authorship network,-0.506337,"(-0.630421687091, 0.0)",graph,edit-mhwiktionary,"(-0.506337305231, 0.0)","(-0.737882001876, 0.0)"
7,Authorship network,-0.500790,"(-0.609983350428, 0.0)",graph,edit-bmwikibooks,"(-0.500790475629, 0.0)","(-0.717311389651, 0.0)"
20,Authorship network,-0.499985,"(-0.559503646696, 0.0)",graph,edit-kswikibooks,"(-0.499985001127, 0.0)","(-0.655207885762, 0.0)"
37,Authorship network,-0.495902,"(-0.602297802114, 0.0)",graph,edit-xhwiktionary,"(-0.495901584223, 0.0)","(-0.703925647899, 0.0)"
4,Authorship network,-0.495204,"(-0.549484190994, 0.0)",graph,edit-rmwikibooks,"(-0.495204129713, 1.19434295958e-304)","(-0.648580667898, 0.0)"
8,Authorship network,-0.484524,"(-0.584069484953, 0.0)",graph,edit-bmwikiquote,"(-0.48452419153, 0.0)","(-0.695063154459, 0.0)"
