In [1]:
import networkcommons as nc
import decoupler as dc
import pandas as pd
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


We first import the network from OmniPath, and check it does not contain unsigned interactions or self loops

In [2]:
meta_network = nc.read_network_from_file("../data/moon/meta_network.sif")

In [3]:
len(meta_network.edges()) # the graph representation loses 884 edges, idk why

82673

In [4]:
meta_network = nc.meta_network_cleanup(meta_network)

In [5]:
len(meta_network.edges)

82672

We read inputs:

In [6]:
sig_input = pd.read_csv("../data/moon/cosmos_sig_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()
rna_input = pd.read_csv("../data/moon/cosmos_rna_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()
metab_input = pd.read_csv("../data/moon/cosmos_metab_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()

For the metabolites, we add the compartment it's located

In [7]:
metab_input = nc.prepare_metab_inputs(metab_input, ["c", "m"]) # same as R

Adding compartment codes.


In [8]:
meta_network = nc.filter_pkn_expressed_genes(rna_input.keys(), meta_network)

MOON: removing unexpressed nodes from PKN...
MOON: 13092 nodes removed


In [9]:
len(meta_network.edges) # R is 37740

38141

We filter out those inputs that cannot be mapped to the prior knowledge network

In [10]:
sig_input = nc.filter_input_nodes_not_in_pkn(sig_input, meta_network) # same as R


COSMOS: 17 input/measured nodes are not inPKN anymore: ['AR', 'CEBPA', 'ESR1', 'FOS', 'FOXA1', 'FOXP1', 'GATA2', 'GATA3', 'HNF4A', 'MITF', 'PRDM14', 'SOX2', 'SPI1', 'TAL1', 'TFAP2C', 'VDR', 'ZNF263']


Now, we prune the network to remove nodes that cannot be reached from neither sources nor targets

In [11]:
len(meta_network.edges)

38141

In [12]:
meta_network = nc.keep_controllable_neighbours(sig_input, meta_network)  # same as R

In [13]:
len(meta_network.edges) # R is 23117, with n_steps 20

23657

In [14]:
metab_input = nc.filter_input_nodes_not_in_pkn(metab_input, meta_network) # Same as R

COSMOS: 154 input/measured nodes are not inPKN anymore: ['Metab__HMDB0011747_c', 'Metab__HMDB0000755_c', 'Metab__HMDB0000355_c', 'Metab__HMDB0000479_c', 'Metab__HMDB0000807_c', 'Metab__HMDB0003464_c', 'Metab__HMDB0000076_c', 'Metab__HMDB0003701_c', 'Metab__HMDB0001494_c', 'Metab__HMDB0000462_c', 'Metab__HMDB0001548_c', 'Metab__HMDB0001893_c', 'Metab__HMDB0000646_c', 'Metab__HMDB0001352_c', 'Metab__HMDB0000902_c', 'Metab__HMDB0000030_c', 'Metab__HMDB0001847_c', 'Metab__HMDB0000062_c', 'Metab__HMDB0000562_c', 'Metab__HMDB0001151_c', 'Metab__HMDB0000283_c', 'Metab__HMDB0000622_c', 'Metab__HMDB0011737_c', 'Metab__HMDB0011741_c', 'Metab__HMDB0000625_c', 'Metab__HMDB0003466_c', 'Metab__HMDB0000130_c', 'Metab__HMDB0000965_c', 'Metab__HMDB0000213_c', 'Metab__HMDB0000715_c', 'Metab__HMDB0004041_c', 'Metab__HMDB0000086_c', 'Metab__HMDB0002320_c', 'Metab__HMDB0000691_c', 'Metab__HMDB0000169_c', 'Metab__HMDB0001078_c', 'Metab__HMDB0001389_c', 'Metab__HMDB0000853_c', 'Metab__HMDB0031018_c', 'Metab_

In [15]:
meta_network = nc.keep_observable_neighbours(metab_input, meta_network)

In [16]:
len(meta_network.edges) # R is 18715, with n_steps 20

19150

In [17]:
sig_input = nc.filter_input_nodes_not_in_pkn(sig_input, meta_network) # same as R

COSMOS: 7 input/measured nodes are not inPKN anymore: ['CTCF', 'ELK1', 'EPAS1', 'ETS1', 'SREBF1', 'TFAP2A', 'USF1']


In [18]:
meta_network_compressed, signatures, dup_parents = nc.compress_same_children(meta_network, sig_input, metab_input) # R is 12482

In [19]:
nx.to_pandas_edgelist(meta_network_compressed).sort_values(['source', 'target']).head(62) # allegedly same as R

Unnamed: 0,source,target,sign
47,AAK1,AP2M1,1
48,AAK1,NUMB,1
2272,ABCC1,parent_of_Metab__HMDB0000121_e1,1
2271,ABCC1,parent_of_Metab__mlthf_e1,1
2269,ABCC1,parent_of_Metab__sphmyln_hs_e1,1
...,...,...,...
9408,ABL1,YAP1,1
9438,ABL1,parent_of_ACTR21_____ACTB1,1
9439,ABL1,parent_of_CCND11,1
9441,ABL1,parent_of_MAPK81,1


In [20]:
len(signatures) # R is 8262

8516

In [21]:
dup_parents # allegedly same as R

{'Gene8363__ACAD9': 'parent_of_Metab__HMDB0001197_m1_____Metab__M00069_m1',
 'Gene4586__ACSL4': 'parent_of_Metab__HMDB0006267_r1',
 'Gene4789__KMT5C': 'parent_of_Metab__Ndmelys_c1_____Metab__HMDB0000939_c1',
 'Gene2527__HADHA': 'parent_of_Metab__CE4804_m1',
 'Gene263900003__SLC3A2_SLC7A6': 'parent_of_Metab__HMDB0000517_e1',
 'Gene40100002__SLC25A19': 'parent_of_Metab__HMDB0001000_m1',
 'Gene7304__PGM1_reverse': 'parent_of_Metab__HMDB0001489_c1',
 'Gene195000002__SLC7A6': 'parent_of_Metab__HMDB0000167_c1',
 'Gene272700003__SLC1A4': 'parent_of_Metab__HMDB0000725_e1',
 'Gene7284__MIA3': 'parent_of_Metab__HMDB0001270_c1',
 'Gene9291__NME6': 'parent_of_Metab__HMDB0001532_m1',
 'Gene167000002__SLC25A10': 'parent_of_Metab__HMDB0000134_m1',
 'Gene1204__INPP5B': 'parent_of_Metab__pail34p_hs_c1',
 'Gene456600002__SLC25A29': 'parent_of_Metab__tmndnccrn_m1',
 'Gene2912__SGMS1': 'parent_of_Metab__dag_hs_c1',
 'Gene4628__HSD17B10': 'parent_of_Metab__M00911_m1',
 'Gene1487__CMPK1': 'parent_of_Metab__

In [22]:
meta_network_compressed = nc.meta_network_cleanup(meta_network_compressed)

In [23]:
len(meta_network_compressed.edges) # R is 12474

12755

In [24]:
tf_regn = dc.get_dorothea(levels = ['A', 'B'])
tf_regn

Unnamed: 0,source,confidence,target,weight
0,MYC,A,TERT,1.0
1,LEF1,A,CCL7,1.0
2,LEF1,A,BIRC5,1.0
3,KLF6,A,LTC4S,1.0
4,KLF5,A,NOTCH1,1.0
...,...,...,...,...
15108,FOS,B,CREB5,0.5
15109,FOS,B,CRIM1,0.5
15110,FOS,B,CRYBG1,0.5
15111,FOS,B,CDK6,0.5


In [25]:
moon_res = nc.run_moon_core(sig_input, metab_input, meta_network_compressed, n_layers=20, statistic='wmean')

Iteration count: 1
Iteration count: 2
Iteration count: 3
Iteration count: 4
Iteration count: 5
Iteration count: 6
Iteration count: 7
Iteration count: 8
Iteration count: 9
Iteration count: 10
Iteration count: 11
Iteration count: 12
Iteration count: 13
Iteration count: 14
Iteration count: 15
Iteration count: 16
Iteration count: 17
Iteration count: 18
Iteration count: 19
Iteration count: 20


In [26]:
moon_res # r is 4925

Unnamed: 0,source,score,level
0,Gene1026__MTR,0.348652,1
1,Gene10490__GPT2,1.401611,1
2,Gene10490__GPT2_reverse,-0.805149,1
3,Gene109__SLC29A1,0.425223,1
4,Gene114__ADSL,-0.545089,1
...,...,...,...
5041,Metab__HMDB0000167_m,-0.604629,0
5042,Metab__HMDB0000725_m,0.158506,0
5043,Metab__HMDB0000158_m,-0.137388,0
5044,Metab__HMDB0000294_m,0.822585,0


In [27]:
meta_network_compressed_moon = nc.filter_incoherent_TF_target(moon_res, tf_regn, meta_network_compressed, rna_input)

In [28]:
nx.to_pandas_edgelist(meta_network_compressed_moon)

Unnamed: 0,source,target,sign
0,Gene6811__orphanReacMEV_Rt,Metab__HMDB0000227_e,1
1,Metab__HMDB0000227_e,parent_of_Metab__HMDB0000227_c1,1
2,Gene3546__orphanReacTRPPHEr_reverse,Metab__trpphe_c,1
3,Metab__trpphe_c,Gene3546__orphanReacTRPPHEr,1
4,Gene1362__orphanReacSPHMYLNtg,Metab__sphmyln_hs_g,1
...,...,...,...
12707,Metab__HMDB0003073_c,Gene9877__orphanReacr1299,1
12708,Metab__HMDB0003073_c,parent_of_Metab__HMDB0006368_c1,1
12709,Metab__M02976_m,parent_of_Metab__M01141_m1,1
12710,Metab__HMDB0000929_e,parent_of_Metab__HMDB0000929_c1,1


In [29]:
len(meta_network.edges)

19150

In [30]:
before=1
after=0
i=0

while before != after and i < 10:
    before = len(meta_network_compressed.edges)
    moon_res = nc.run_moon_core(sig_input, metab_input, meta_network_compressed, n_layers=20, statistic='wmean')
    meta_network_compressed = nc.filter_incoherent_TF_target(moon_res, tf_regn, meta_network_compressed, rna_input)
    after = len(meta_network_compressed.edges)
    i += 1
    print(f'Attempt {i} - Before: {before}, After: {after}')

Iteration count: 1
Iteration count: 2
Iteration count: 3
Iteration count: 4
Iteration count: 5
Iteration count: 6
Iteration count: 7
Iteration count: 8
Iteration count: 9
Iteration count: 10
Iteration count: 11
Iteration count: 12
Iteration count: 13
Iteration count: 14
Iteration count: 15
Iteration count: 16
Iteration count: 17
Iteration count: 18
Iteration count: 19
Iteration count: 20
Attempt 1 - Before: 12755, After: 12712
Iteration count: 1
Iteration count: 2
Iteration count: 3
Iteration count: 4
Iteration count: 5
Iteration count: 6
Iteration count: 7
Iteration count: 8
Iteration count: 9
Iteration count: 10
Iteration count: 11
Iteration count: 12
Iteration count: 13
Iteration count: 14
Iteration count: 15
Iteration count: 16
Iteration count: 17
Iteration count: 18
Iteration count: 19
Iteration count: 20
Attempt 2 - Before: 12712, After: 12712


In [31]:
decompressed_network = nc.decompress_moon_result(moon_res, signatures, dup_parents, meta_network_compressed)

In [32]:
decompressed_network # R is 8255

Unnamed: 0,source,score,level,source_original
0,AAK1,-0.074214,5.0,AAK1
1,ABCC1,-0.383142,6.0,ABCC1
2,ABI1,-0.145249,6.0,ABI1
3,ABL1,0.273452,4.0,ABL1
4,ABL2,-0.194041,5.0,ABL2
...,...,...,...,...
8520,parent_of_TUBB1,-0.292769,6.0,DPYSL2
8521,parent_of_VASP1,0.837532,7.0,LPP
8522,parent_of_VASP1,0.837532,7.0,ZYX
8523,parent_of_VCP1,-0.423533,6.0,AMFR


In [33]:
import numpy as np

In [34]:
res_network, att = nc.reduce_solution_network(decompressed_network, meta_network, 1, sig_input, rna_input)

In [35]:
mapping_dict = pd.read_csv("../data/moon/hmdb_mapper_vec.tsv", sep="\t", header=0).set_index('hmdb_id')['name'].to_dict()

In [36]:
translated_network, att_translated = nc.translate_res(res_network, att, mapping_dict)

In [40]:
att_translated

Unnamed: 0,score,level,nodes,real_score
0,1.401611,2.0,CAD,0.270492
1,2.249355,3.0,CAV1,0.621135
2,1.086776,4.0,DUSP1,-0.330174
3,1.401611,3.0,Enzyme1346__ESD,
4,1.401611,5.0,Enzyme13__orphanReac10FTHF7GLUtl,
...,...,...,...,...
277,1.401611,5.0,Enzyme961__ADH5,
278,1.401611,5.0,Enzyme1755__ADH5,
279,1.401611,5.0,Enzyme657__ADH5,
280,1.401611,5.0,Enzyme9174__orphanReacLEUKTRD4tr_reverse,
