In [1]:
import networkcommons as nc
import decoupler as dc
import pandas as pd
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


We first import the network from OmniPath, and check it does not contain unsigned interactions or self loops

In [59]:
meta_network = nc.read_network_from_file("../data/moon/meta_network.sif")
meta_network = nc.meta_network_cleanup(meta_network)

In [60]:
len(meta_network.edges)

82672

We read inputs:

In [61]:
sig_input = pd.read_csv("../data/moon/cosmos_sig_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()
rna_input = pd.read_csv("../data/moon/cosmos_rna_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()
metab_input = pd.read_csv("../data/moon/cosmos_metab_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()

For the metabolites, we add the compartment it's located

In [62]:
metab_input = nc.prepare_metab_inputs(metab_input, ["c", "m"]) # same as R

Adding compartment codes.


In [63]:
metab_input

{'Metab__HMDB0011747_c': 0.4019914211816579,
 'Metab__HMDB0000755_c': -0.1427176460700605,
 'Metab__HMDB0000905_c': -1.2441833464227905,
 'Metab__HMDB0000012_c': -0.9672066427839662,
 'Metab__HMDB0001191_c': -0.6310351789016082,
 'Metab__HMDB0001294_c': -0.5125025754505299,
 'Metab__HMDB0000355_c': -0.4920511282256854,
 'Metab__HMDB0000479_c': 0.9025297218841531,
 'Metab__HMDB0000807_c': 0.1123336349264507,
 'Metab__HMDB0000272_c': 1.149882807888093,
 'Metab__HMDB0003464_c': 0.1735796353080372,
 'Metab__HMDB0000267_c': 0.4881789430791288,
 'Metab__HMDB0001173_c': -1.7030122889661738,
 'Metab__HMDB0000076_c': -0.7267522422253924,
 'Metab__HMDB0003701_c': -0.3452798546689682,
 'Metab__HMDB0001316_c': -0.1481290815558073,
 'Metab__HMDB0001494_c': -0.7340366191925246,
 'Metab__HMDB0000034_c': -0.3321618212438423,
 'Metab__HMDB0000050_c': 0.4252225886284037,
 'Metab__HMDB0000161_c': -1.1831454050609964,
 'Metab__HMDB0000462_c': -0.100266876757015,
 'Metab__HMDB0001548_c': 1.6979258049603223

In [64]:
nc.filter_pkn_expressed_genes(rna_input.keys(), meta_network)

MOON: removing unexpressed nodes from PKN...
MOON: 13092 nodes removed


<networkx.classes.digraph.DiGraph at 0x7fcfdfb424a0>

In [65]:
len(meta_network.edges) # R is 37740

38141

We filter out those inputs that cannot be mapped to the prior knowledge network

In [66]:
sig_input = nc.filter_input_nodes_not_in_pkn(sig_input, meta_network) # same as R


COSMOS: 17 input/measured nodes are not inPKN anymore: ['AR', 'CEBPA', 'ESR1', 'FOS', 'FOXA1', 'FOXP1', 'GATA2', 'GATA3', 'HNF4A', 'MITF', 'PRDM14', 'SOX2', 'SPI1', 'TAL1', 'TFAP2C', 'VDR', 'ZNF263']


Now, we prune the network to remove nodes that cannot be reached from neither sources nor targets

In [67]:
len(meta_network.edges)

38141

In [68]:
meta_network = nc.keep_controllable_neighbours(sig_input, meta_network)  # same as R

In [69]:
len(meta_network.edges) # R is 23117, with n_steps 20

23657

In [70]:
metab_input = nc.filter_input_nodes_not_in_pkn(metab_input, meta_network) # Same as R

COSMOS: 154 input/measured nodes are not inPKN anymore: ['Metab__HMDB0011747_c', 'Metab__HMDB0000755_c', 'Metab__HMDB0000355_c', 'Metab__HMDB0000479_c', 'Metab__HMDB0000807_c', 'Metab__HMDB0003464_c', 'Metab__HMDB0000076_c', 'Metab__HMDB0003701_c', 'Metab__HMDB0001494_c', 'Metab__HMDB0000462_c', 'Metab__HMDB0001548_c', 'Metab__HMDB0001893_c', 'Metab__HMDB0000646_c', 'Metab__HMDB0001352_c', 'Metab__HMDB0000902_c', 'Metab__HMDB0000030_c', 'Metab__HMDB0001847_c', 'Metab__HMDB0000062_c', 'Metab__HMDB0000562_c', 'Metab__HMDB0001151_c', 'Metab__HMDB0000283_c', 'Metab__HMDB0000622_c', 'Metab__HMDB0011737_c', 'Metab__HMDB0011741_c', 'Metab__HMDB0000625_c', 'Metab__HMDB0003466_c', 'Metab__HMDB0000130_c', 'Metab__HMDB0000965_c', 'Metab__HMDB0000213_c', 'Metab__HMDB0000715_c', 'Metab__HMDB0004041_c', 'Metab__HMDB0000086_c', 'Metab__HMDB0002320_c', 'Metab__HMDB0000691_c', 'Metab__HMDB0000169_c', 'Metab__HMDB0001078_c', 'Metab__HMDB0001389_c', 'Metab__HMDB0000853_c', 'Metab__HMDB0031018_c', 'Metab_

In [71]:
meta_network = nc.keep_observable_neighbours(metab_input, meta_network)

In [72]:
len(meta_network.edges) # R is 18715, with n_steps 20

19150

In [73]:
sig_input = nc.filter_input_nodes_not_in_pkn(sig_input, meta_network) # same as R

COSMOS: 7 input/measured nodes are not inPKN anymore: ['CTCF', 'ELK1', 'EPAS1', 'ETS1', 'SREBF1', 'TFAP2A', 'USF1']


In [74]:
meta_network_compressed, signatures, dup_parents = nc.compress_same_children(meta_network, sig_input, metab_input)

In [81]:
meta_network_compressed.edges['ABCC1', 'parent_of_Metab__HMDB0000121_e1'] # allegedly same as R

{'sign': 1}

In [82]:
signatures # allegedly same as R

{'ANAPC2': 'parent_of_CDC261_____ANAPC111_____ANAPC11_____CDC231_____CDC271_____CDC161_____ANAPC41_____ANAPC101_____DVL1-1_____FZR11_____ANAPC71_____ANAPC51',
 'Gene7932__SCD': 'parent_of_Metab__M00127_c1',
 'Metab__phetrpleu_c': 'parent_of_Gene7354__orphanReacPHETRPLEUr1',
 'Gene3422__orphanReacALAARGCYSr_reverse': 'parent_of_Metab__alaargcys_c1',
 'Gene9857__orphanReacr1109': 'parent_of_Metab__HMDB0000223_m1',
 'Gene6537__ECI1': 'parent_of_Metab__2docopencoa_m1',
 'Gene4558__CPT1B': 'parent_of_Metab__vacccrn_c1',
 'Metab__valserarg_c': 'parent_of_Gene7432__orphanReacVALSERARGr1',
 'Metab__cs_d_deg2_l': 'parent_of_Gene9481__orphanReacS2TASE4ly1',
 'Gene685800001__orphanReacC05957t': 'parent_of_Metab__HMDB0000190_c1',
 'Gene9415__PI4K2A': 'parent_of_Metab__pail45p_hs_c1',
 'Gene9266__NAGA': 'parent_of_Metab__Ser_Thr_l1_____Metab__udpacgal_l1',
 'Gene1005600002__SLC7A6': 'parent_of_Metab__HMDB0000161_e1',
 'Gene997700002__SLC7A6': 'parent_of_Metab__HMDB0000123_c1',
 'Gene191400002__SLC7

In [83]:
len(signatures) # R is 8262

8516

In [84]:
dup_parents # allegedly same as R

{'Gene7932__SCD': 'parent_of_Metab__M00127_c1',
 'Gene9857__orphanReacr1109': 'parent_of_Metab__HMDB0000223_m1',
 'Gene4558__CPT1B': 'parent_of_Metab__vacccrn_c1',
 'Gene685800001__orphanReacC05957t': 'parent_of_Metab__HMDB0000190_c1',
 'Gene9415__PI4K2A': 'parent_of_Metab__pail45p_hs_c1',
 'Gene1005600002__SLC7A6': 'parent_of_Metab__HMDB0000161_e1',
 'Gene997700002__SLC7A6': 'parent_of_Metab__HMDB0000123_c1',
 'Gene191400002__SLC7A6': 'parent_of_Metab__HMDB0000574_c1',
 'Gene10515__LPIN2': 'parent_of_Metab__HMDB0000252_c1',
 'Gene1747__orphanReacPPCOAtm_reverse': 'parent_of_Metab__HMDB0001275_c1',
 'Gene9880__orphanReacr1303': 'parent_of_Metab__HMDB0003229_r1',
 'Gene220700002__SLC25A1': 'parent_of_Metab__HMDB0000072_c1',
 'Gene184000001__SLC7A5': 'parent_of_Metab__HMDB0000167_c1',
 'Gene6225__HADHB_reverse': 'parent_of_Metab__CE2434_m1',
 'Gene2366__HADH_reverse': 'parent_of_Metab__CE4790_m1',
 'Gene4097__LDHB_reverse': 'parent_of_Metab__HMDB0000019_c1',
 'Gene5455__ALDH3B1': 'parent

In [85]:
meta_network_compressed = nc.meta_network_cleanup(meta_network_compressed)

In [86]:
len(meta_network_compressed.edges) # R is 18715

12755

In [None]:
tf_regn = dc.get_collectri()
tf_regn

In [None]:
TF_reg_net = tf_regn


In [None]:
RNA_df = pd.DataFrame.from_dict(rna_input, orient='index', columns=['RNA_input'])

reg_meta = moon_res[moon_res.index.isin(TF_reg_net.index)]
reg_meta = reg_meta.join(TF_reg_net)
reg_meta.rename(columns={'score': 'TF_score'}, inplace=True)


In [None]:
reg_meta

In [None]:

reg_meta = pd.merge(reg_meta, RNA_df, left_on='target', right_index=True)
reg_meta['incoherent'] = np.sign(reg_meta['TF_score'] * reg_meta['RNA_input'] * reg_meta['weight']) < 0
reg_meta = reg_meta[reg_meta["incoherent"]==True][['target']]
reg_meta


In [None]:
meta_network_compressed.edges

In [None]:
to_tuple_list = reg_meta.rename_axis("source").reset_index()
tuple_list = list(to_tuple_list.itertuples(index=False, name=None))
tuple_list

In [None]:
meta_network_compressed.get_edge_data('MYC', 'TERT')

In [None]:
filter_incoherent_TF_target(moon_res, tf_regn, meta_network_compressed, rna_input)

In [None]:
before=1
after=0
i=0

while before != after and i < 10:
    before = len(meta_network_compressed.nodes)
    moon_res = nc.run_moon_core(sig_input, metab_input, meta_network_compressed, n_layers=100, statistic='wmean')
    meta_network_compressed = filter_incoherent_TF_target(moon_res, tf_regn, meta_network_compressed, rna_input)
    after = len(meta_network_compressed.nodes)
    i += 1

In [None]:
tf_regn

In [None]:
import numpy as np
def filter_incoherent_TF_target(decoupleRnival_res,
                               TF_reg_net,
                               meta_network,
                               RNA_input):
    """
    Filters incoherent TF-target interactions from the meta_network based on
    the given inputs.

    Parameters:
    decoupleRnival_res (pd.DataFrame): DataFrame containing decoupled RNAi
    validation results.
    TF_reg_net (pd.DataFrame): DataFrame containing TF regulatory network.
    meta_network (networkx.Graph): Graph representing the meta network.
    RNA_input (dict): Dictionary containing RNA input values.

    Returns:
    networkx.Graph: Filtered meta network with incoherent TF-target
    interactions removed.
    """

    TF_reg_net.set_index('source', inplace=True, drop=True)
    RNA_df = pd.DataFrame.from_dict(RNA_input, orient='index', columns=['RNA_input'])

    reg_meta = decoupleRnival_res[decoupleRnival_res.index.isin(TF_reg_net.index)]
    reg_meta = reg_meta.join(TF_reg_net)
    reg_meta.rename(columns={'score': 'TF_score'}, inplace=True)

    reg_meta = pd.merge(reg_meta, RNA_df, left_on='target', right_index=True)
    reg_meta['incoherent'] = np.sign(reg_meta['TF_score'] * reg_meta['RNA_input'] * reg_meta['weight']) < 0

    reg_meta = reg_meta[reg_meta["incoherent"]==True][['target']]
    print(reg_meta.head())

    to_tuple_list = reg_meta.rename_axis("source").reset_index()
    tuple_list = list(to_tuple_list.itertuples(index=False, name=None))
    print(tuple_list)
    print(len(meta_network.edges))
    print(meta_network.edges[tuple_list[0]])
    meta_network.remove_edges_from(tuple_list)
    print(len(meta_network.edges))

    return meta_network

In [None]:
moon_res.reset_index(inplace=True)
moon_res.rename(columns={'index': 'source'}, inplace=True)


In [None]:
compression_dict = {"node_signatures": signatures, "duplicated_signatures": dup_parents}

In [None]:
meta_network_compressed.nodes

In [None]:
moon_res = nc.decompress_moon_result(moon_res, compression_dict, meta_network_compressed)

In [None]:
import numpy as np
def reduce_solution_network(decoupleRnival_res, meta_network, cutoff, sig_input, RNA_input=None):
    recursive_decoupleRnival_res = decoupleRnival_res.copy()

    recursive_decoupleRnival_res = recursive_decoupleRnival_res[abs(recursive_decoupleRnival_res['score']) > cutoff].set_index('source')
    consistency_vec = recursive_decoupleRnival_res['score'].to_dict()
    res_network = meta_network.subgraph([node for node in meta_network_compressed.nodes if node in recursive_decoupleRnival_res.index.values])


    res_network_edges = res_network.edges(data=True)
    res_network = nx.DiGraph()
    for source, target, data in res_network_edges:
        if data['sign'] == np.sign(consistency_vec[source] * consistency_vec[target]):
            res_network.add_edge(source, target, interaction=data['sign'])

    sig_input_df = pd.DataFrame.from_dict(sig_input, orient='index', columns=['real_score'])
    merged_df = sig_input_df.join(recursive_decoupleRnival_res, how='left')
    merged_df['filterout'] = np.sign(merged_df['real_score']) != np.sign(merged_df['score'])
    merged_df = merged_df[merged_df['filterout'] == False]
    upstream_nodes = merged_df.index.values
    upstream_nodes = {node: 1 for node in upstream_nodes if node in res_network.nodes}

    res_network = nc.keep_controllable_neighbours(upstream_nodes, res_network)


    return res_network

In [None]:
sol_network = reduce_solution_network(moon_res, meta_network_compressed, 0, sig_input, rna_input)

In [None]:
translated_res = translate_res(sol_network, hmdb_mapper_vec)

In [None]:
translated_res.nodes

In [None]:
sol_network.nodes()

In [None]:
tf_regn.set_index('source', inplace=True, drop=True)
RNA_df = pd.DataFrame.from_dict(rna_input, orient='index', columns=['RNA_input'])


In [None]:

reg_meta = moon_res[moon_res.index.isin(tf_regn.index)]
reg_meta = reg_meta.join(tf_regn)
reg_meta.rename(columns={'score': 'TF_score'}, inplace=True)

reg_meta = pd.merge(reg_meta, RNA_df, left_on='target', right_index=True)

In [None]:
import numpy as np

In [None]:
reg_meta

In [None]:
import re

def translate_res(graph, HMDB_mapper_vec=None):
    graph = nx.relabel_nodes(graph, lambda x: re.sub("Metab__", "", x))
    graph = nx.relabel_nodes(graph, lambda x: re.sub("^Gene", "Enzyme", x))
    graph = nx.relabel_nodes(graph, lambda x: re.sub("_[a-z]$", "", x))
    graph = nx.relabel_nodes(graph, lambda x: HMDB_mapper_vec[x] if x in HMDB_mapper_vec else x)
    graph = nx.relabel_nodes(graph, lambda x: "Metab__" + x if x.startswith("HMDB") else x)
    
    return graph


In [None]:
import pandas as pd

example_SIF = pd.DataFrame({
    'source': ['GPX1', 'Gene863__GPX1'],
    'target': ['Gene863__GPX1', 'Metab__HMDB0003337_c'],
    'sign': [1, 1]
})

example_ATT = pd.DataFrame({
    'Nodes': ['GPX1', 'Gene863__GPX1', 'Metab__HMDB0003337_c'],
    'sign': [1, 1, 1]
})


In [None]:
hmdb_mapper_vec = pd.read_csv("../data/moon/hmdb_mapper_vec.tsv", sep="\t")

In [None]:
hmdb_mapper_vec = hmdb_mapper_vec.set_index('hmdb_id').name.to_dict()