In [1]:
%matplotlib inline
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import scipy as sp
import scipy.io

In [2]:
from utils import *

In [3]:
cd synth/

/home/spiaggesi/disene/exps/synth


# DiSeNE 

In [4]:
#model:    "isgc" (DiSe-GAE), "imlp" (DiSe-FCAE)
#dataset:  "ring_of_cliques", "stochastic_block_model", "ba_cliques", "er_cliques"

In [5]:
!python exp_disene.py --runs=1 --model='isgc' --dataset='ring_of_cliques'

ring_of_cliques_32_10 ISGC 128 128 0 embeddings saved
ring_of_cliques_32_10 ISGC 128 128 0 dim. metrics saved
ring_of_cliques_32_10 ISGC 128 128 0 pos. metrics saved
ring_of_cliques_32_10 ISGC 128 128 0 task metrics saved


In [6]:
D_hid = 128
D_out = 128

In [7]:
emb_scores = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/linearshap_metrics/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.scores.npz', allow_pickle=True)
emb_dist = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/linearshap_metrics/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.spath.npz', allow_pickle=True)

In [8]:
Z = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.npz', allow_pickle=True)['arr_0']
emb_masks = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/linearshap_metrics/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.masks.npz', allow_pickle=True)['arr_0']

In [9]:
i_task = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/shap_metrics/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.linear_task.npz', allow_pickle=True)
i_scores = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/shap_metrics/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.linear_scores.npz', allow_pickle=True)
i_masks = np.load(f'../../output/synth/isgc-walks-featureless-1hops-5window/shap_metrics/ring_of_cliques_32_10_ISGC_{D_hid}dims_{D_out}dims.0.linear_masks.npz', allow_pickle=True)['arr_0']

#### Comprehensibility and Sparsity

In [10]:
print('comprehensibility: ', round(emb_scores['f1_weighted'].max(axis=0).mean(), 4))
print('sparsity: ', round(1.-emb_scores['dim_ent_weighted'].mean(), 4))

comprehensibility:  0.9676
sparsity:  0.4756


#### Overlap consistency

In [11]:
from scipy.stats import pearsonr
from scipy.spatial.distance import pdist, squareform
def compute_correlation(x,y):
    if x.shape[0]<2:
        return 0.
    else:
        return np.nan_to_num(np.abs(pearsonr(x,y)[0]))
        
z_corr = np.nan_to_num(np.abs(1. - pdist(Z.T, 'correlation')))
print('overlap consistency: ', round(compute_correlation(z_corr[z_corr!=0.], emb_scores['jaccard_score'][z_corr!=0.]), 4))

overlap consistency:  0.9706


#### Positional Coherence

In [12]:
edge_idx = emb_scores['edge_index']
dims, edges = np.where(emb_masks>THRESH)
Z = Z[np.unique(edge_idx)][:, np.unique(dims)].T

In [13]:
d_corr = np.array([compute_correlation(Z[d], emb_dist['sum'][d]) for d in range(Z.shape[0])])
d_corr_rand = np.array([[compute_correlation(Z[d], emb_dist['sum'][j]) for d,j in zip(range(Z.shape[0]), np.random.permutation(range(Z.shape[0])))]
                       for _ in range(5)])
print('positional coherence: ', round(np.mean(d_corr)/np.mean(d_corr_rand+THRESH), 4))

positional coherence:  6.7399


#### Plausibility

In [14]:
print('plausibility: ', round(compute_plausibility_score(i_masks, i_scores, i_task), 4))

plausibility:  0.9865


# DeepWalk, InfWalk, GAE, GraphSAGE 

In [15]:
#model:    "deepwalk", "infwalk", "gae", "sage"
#dataset:  "ring_of_cliques", "stochastic_block_model", "ba_cliques", "er_cliques"

In [16]:
!python exp_baseline.py --runs=1 --model='gae' --dataset='ring_of_cliques'

ring_of_cliques_32_10 GAE 2 0 embeddings saved
ring_of_cliques_32_10 GAE 2 0 dim. metrics saved
ring_of_cliques_32_10 GAE 2 0 pos. metrics saved
ring_of_cliques_32_10 GAE 2 0 task metrics saved
ring_of_cliques_32_10 GAE 4 0 embeddings saved
ring_of_cliques_32_10 GAE 4 0 dim. metrics saved
ring_of_cliques_32_10 GAE 4 0 pos. metrics saved
ring_of_cliques_32_10 GAE 4 0 task metrics saved
ring_of_cliques_32_10 GAE 8 0 embeddings saved
ring_of_cliques_32_10 GAE 8 0 dim. metrics saved
ring_of_cliques_32_10 GAE 8 0 pos. metrics saved
ring_of_cliques_32_10 GAE 8 0 task metrics saved
ring_of_cliques_32_10 GAE 16 0 embeddings saved
ring_of_cliques_32_10 GAE 16 0 dim. metrics saved
ring_of_cliques_32_10 GAE 16 0 pos. metrics saved
ring_of_cliques_32_10 GAE 16 0 task metrics saved
ring_of_cliques_32_10 GAE 32 0 embeddings saved
ring_of_cliques_32_10 GAE 32 0 dim. metrics saved
ring_of_cliques_32_10 GAE 32 0 pos. metrics saved
ring_of_cliques_32_10 GAE 32 0 task metrics saved
ring_of_cliques_32_10 

In [17]:
D_out = 128

In [18]:
emb_scores = np.load(f'../../output/synth/gae-featureless/linearshap_metrics/ring_of_cliques_32_10_GAE_{D_out}dims.0.scores.npz', allow_pickle=True)
emb_dist = np.load(f'../../output/synth/gae-featureless/linearshap_metrics/ring_of_cliques_32_10_GAE_{D_out}dims.0.spath.npz', allow_pickle=True)

In [19]:
Z = np.load(f'../../output/synth/gae-featureless/ring_of_cliques_32_10_GAE_{D_out}dims.0.npz', allow_pickle=True)['arr_0']
emb_masks = np.load(f'../../output/synth/gae-featureless/linearshap_metrics/ring_of_cliques_32_10_GAE_{D_out}dims.0.masks.npz', allow_pickle=True)['arr_0']

In [20]:
i_task = np.load(f'../../output/synth/gae-featureless/shap_metrics/ring_of_cliques_32_10_GAE_{D_out}dims.0.linear_task.npz', allow_pickle=True)
i_scores = np.load(f'../../output/synth/gae-featureless/shap_metrics/ring_of_cliques_32_10_GAE_{D_out}dims.0.linear_scores.npz', allow_pickle=True)
i_masks = np.load(f'../../output/synth/gae-featureless/shap_metrics/ring_of_cliques_32_10_GAE_{D_out}dims.0.linear_masks.npz', allow_pickle=True)['arr_0']

#### Comprehensibility and Sparsity

In [21]:
print('comprehensibility: ', round(emb_scores['f1_weighted'].max(axis=0).mean(), 4))
print('sparsity: ', round(1.-emb_scores['dim_ent_weighted'].mean(), 4))

comprehensibility:  0.2677
sparsity:  0.1462


#### Overlap consistency

In [22]:
from scipy.stats import pearsonr
from scipy.spatial.distance import pdist, squareform
def compute_correlation(x,y):
    if x.shape[0]<2:
        return 0.
    else:
        return np.nan_to_num(np.abs(pearsonr(x,y)[0]))
        
z_corr = np.nan_to_num(np.abs(1. - pdist(Z.T, 'correlation')))
print('overlap consistency: ', round(compute_correlation(z_corr[z_corr!=0.], emb_scores['jaccard_score'][z_corr!=0.]), 4))

overlap consistency:  0.1667


#### Positional Coherence

In [23]:
edge_idx = emb_scores['edge_index']
dims, edges = np.where(emb_masks>THRESH)
Z = Z[np.unique(edge_idx)][:, np.unique(dims)].T

In [24]:
d_corr = np.array([compute_correlation(Z[d], emb_dist['sum'][d]) for d in range(Z.shape[0])])
d_corr_rand = np.array([[compute_correlation(Z[d], emb_dist['sum'][j]) for d,j in zip(range(Z.shape[0]), np.random.permutation(range(Z.shape[0])))]
                       for _ in range(5)])
print('positional coherence: ', round(np.mean(d_corr)/np.mean(d_corr_rand+THRESH), 4))

positional coherence:  0.9031


#### Plausibility

In [25]:
print('plausibility: ', round(compute_plausibility_score(i_masks, i_scores, i_task), 4))

plausibility:  0.1608


# DeepWalk+DINE, GAE+DINE

In [26]:
#model:    "deepwalk", "gae"
#dataset:  "ring_of_cliques", "stochastic_block_model", "ba_cliques", "er_cliques"

In [27]:
!python exp_dine.py --runs=1 --model='gae' --dataset='ring_of_cliques'

ring_of_cliques_32_10 GAE 8 128 0 dine embeddings saved
ring_of_cliques_32_10 GAE 8 128 0 dim. metrics saved
ring_of_cliques_32_10 GAE 8 128 0 pos. metrics saved
ring_of_cliques_32_10 GAE 8 128 0 task metrics saved
ring_of_cliques_32_10 GAE 16 128 0 dine embeddings saved
ring_of_cliques_32_10 GAE 16 128 0 dim. metrics saved
ring_of_cliques_32_10 GAE 16 128 0 pos. metrics saved
ring_of_cliques_32_10 GAE 16 128 0 task metrics saved
ring_of_cliques_32_10 GAE 32 128 0 dine embeddings saved
ring_of_cliques_32_10 GAE 32 128 0 dim. metrics saved
ring_of_cliques_32_10 GAE 32 128 0 pos. metrics saved
ring_of_cliques_32_10 GAE 32 128 0 task metrics saved
ring_of_cliques_32_10 GAE 64 128 0 dine embeddings saved
ring_of_cliques_32_10 GAE 64 128 0 dim. metrics saved
ring_of_cliques_32_10 GAE 64 128 0 pos. metrics saved
ring_of_cliques_32_10 GAE 64 128 0 task metrics saved
ring_of_cliques_32_10 GAE 128 128 0 dine embeddings saved
ring_of_cliques_32_10 GAE 128 128 0 dim. metrics saved
ring_of_cliques

In [28]:
D_in = 128
D_out = 128

In [29]:
emb_scores = np.load(f'../../output/synth/gae+dine-featureless/linearshap_metrics/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.scores.npz', allow_pickle=True)
emb_dist = np.load(f'../../output/synth/gae+dine-featureless/linearshap_metrics/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.spath.npz', allow_pickle=True)

In [30]:
Z = np.load(f'../../output/synth/gae+dine-featureless/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.npz', allow_pickle=True)['arr_0']
emb_masks = np.load(f'../../output/synth/gae+dine-featureless/linearshap_metrics/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.masks.npz', allow_pickle=True)['arr_0']

In [31]:
i_task = np.load(f'../../output/synth/gae+dine-featureless/shap_metrics/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.linear_task.npz', allow_pickle=True)
i_scores = np.load(f'../../output/synth/gae+dine-featureless/shap_metrics/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.linear_scores.npz', allow_pickle=True)
i_masks = np.load(f'../../output/synth/gae+dine-featureless/shap_metrics/ring_of_cliques_32_10_GAE_{D_in}dims.DINE_{D_out}dims.0.linear_masks.npz', allow_pickle=True)['arr_0']

#### Comprehensibility and Sparsity

In [32]:
print('comprehensibility: ', round(emb_scores['f1_weighted'].max(axis=0).mean(), 4))
print('sparsity: ', round(1.-emb_scores['dim_ent_weighted'].mean(), 4))

comprehensibility:  0.5572
sparsity:  0.3044


#### Overlap consistency

In [33]:
from scipy.stats import pearsonr
from scipy.spatial.distance import pdist, squareform
def compute_correlation(x,y):
    if x.shape[0]<2:
        return 0.
    else:
        return np.nan_to_num(np.abs(pearsonr(x,y)[0]))
        
z_corr = np.nan_to_num(np.abs(1. - pdist(Z.T, 'correlation')))
print('overlap consistency: ', round(compute_correlation(z_corr[z_corr!=0.], emb_scores['jaccard_score'][z_corr!=0.]), 4))

overlap consistency:  0.4231


#### Positional Coherence

In [34]:
edge_idx = emb_scores['edge_index']
dims, edges = np.where(emb_masks>THRESH)
Z = Z[np.unique(edge_idx)][:, np.unique(dims)].T

In [35]:
d_corr = np.array([compute_correlation(Z[d], emb_dist['sum'][d]) for d in range(Z.shape[0])])
d_corr_rand = np.array([[compute_correlation(Z[d], emb_dist['sum'][j]) for d,j in zip(range(Z.shape[0]), np.random.permutation(range(Z.shape[0])))]
                       for _ in range(5)])
print('positional coherence: ', round(np.mean(d_corr)/np.mean(d_corr_rand+THRESH), 4))

positional coherence:  4.1403


#### Plausibility

In [36]:
print('plausibility: ', round(compute_plausibility_score(i_masks, i_scores, i_task), 4))

plausibility:  0.5188
