# Permute Hetnets for Interpreting Compressed Latent Spaces

Modified from @dhimmel - https://github.com/dhimmel/integrate/blob/master/permute.ipynb

Generate several randomly permuted hetnets to serve as a null distribution. The permutations preserve node degree but randomizes connections between nodes. See [Himmelstein et al. 2017](https://doi.org/10.7554/eLife.26726) for more details.

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import pandas as pd

import hetio.readwrite
import hetio.permute

In [2]:
%%time
hetnet_path = os.path.join('hetnets', 'interpret_hetnet.json.bz2')
graph = hetio.readwrite.read_graph(hetnet_path)

CPU times: user 6.31 s, sys: 240 ms, total: 6.55 s
Wall time: 6.55 s


In [3]:
# Selected as a result of `scripts/evaluate-permutations.ipynb`
num_permuted_hetnets = 10
num_swaps = 4

In [4]:
%%time
stat_dfs = list()
permuted_graph = graph
 
for i in range(num_permuted_hetnets):
    i += 1
    print('Starting permutation', i)
    permuted_graph, stats = hetio.permute.permute_graph(permuted_graph,
                                                        multiplier=num_swaps,
                                                        seed=i)
    stat_df = pd.DataFrame(stats)
    stat_df['permutation'] = i
    stat_dfs.append(stat_df)
    try:
        perm_path = os.path.join('hetnets', 'permuted', 'interpret_hetnet_perm-{}.json.bz2'.format(i))
    except:
        os.mkdir(os.path.join('hetnets', 'permuted'))
        perm_path = os.path.join('hetnets', 'permuted', 'interpret_hetnet_perm-{}.json.bz2'.format(i))
        
    hetio.readwrite.write_graph(permuted_graph, perm_path)

Starting permutation 1
Starting permutation 2
Starting permutation 3
Starting permutation 4
Starting permutation 5
Starting permutation 6
Starting permutation 7
Starting permutation 8
Starting permutation 9
Starting permutation 10
CPU times: user 5min 37s, sys: 739 ms, total: 5min 37s
Wall time: 5min 37s


In [5]:
# Save stats
stat_df = pd.concat(stat_dfs)
stat_path = os.path.join('hetnets', 'permuted', 'stats.tsv')
stat_df.to_csv(stat_path, sep='\t', index=False, float_format='%.5g')

In [6]:
stat_df

Unnamed: 0,cumulative_attempts,attempts,complete,unchanged,same_edge,self_loop,duplicate,undirected_duplicate,excluded,metaedge,abbrev,permutation
0,1963,1964,0.100020,0.527602,0.000000,0.0,0.179735,0.0,0.0,Gene - participates - Computational-Gene-Sets-CGN,GpC4CGN,1
1,3926,1963,0.199990,0.303932,0.000509,0.0,0.167601,0.0,0.0,Gene - participates - Computational-Gene-Sets-CGN,GpC4CGN,1
2,5889,1963,0.299959,0.196985,0.000000,0.0,0.170148,0.0,0.0,Gene - participates - Computational-Gene-Sets-CGN,GpC4CGN,1
3,7852,1963,0.399929,0.146058,0.000000,0.0,0.170657,0.0,0.0,Gene - participates - Computational-Gene-Sets-CGN,GpC4CGN,1
4,9815,1963,0.499898,0.119373,0.000000,0.0,0.175242,0.0,0.0,Gene - participates - Computational-Gene-Sets-CGN,GpC4CGN,1
...,...,...,...,...,...,...,...,...,...,...,...,...
147,4956,708,0.699746,0.049125,0.002825,0.0,0.076271,0.0,0.0,Gene - participates - xCell-Cell-Type,GpXCELL,10
148,5664,708,0.799689,0.047995,0.000000,0.0,0.084746,0.0,0.0,Gene - participates - xCell-Cell-Type,GpXCELL,10
149,6372,708,0.899633,0.047431,0.002825,0.0,0.088983,0.0,0.0,Gene - participates - xCell-Cell-Type,GpXCELL,10
150,7080,708,0.999577,0.039526,0.002825,0.0,0.077684,0.0,0.0,Gene - participates - xCell-Cell-Type,GpXCELL,10
