In [1]:
import networkcommons as nc
import decoupler as dc
import pandas as pd
import networkx as nx

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
meta_network = nc.read_network_from_file("../data/moon/meta_network.sif")

In [3]:
meta_network

<networkx.classes.digraph.DiGraph at 0x7f84832faa10>

In [4]:
meta_network_post = nc.meta_network_cleanup(meta_network)

In [5]:
meta_network_post

<networkx.classes.digraph.DiGraph at 0x7f84832cf1f0>

In [6]:
sig_input = pd.read_csv("../data/moon/cosmos_sig_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()
rna_input = pd.read_csv("../data/moon/cosmos_rna_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()
metab_input = pd.read_csv("../data/moon/cosmos_metab_input.tsv", sep="\t", header=0, index_col=0).value.to_dict()

In [7]:
metab_input = nc.prepare_metab_inputs(metab_input, ["c", "m"])

Adding compartment codes.


In [8]:
nc.filter_pkn_expressed_genes(rna_input.keys(), meta_network_post)

MOON: removing unexpressed nodes from PKN...
MOON: 13092 nodes removed


<networkx.classes.digraph.DiGraph at 0x7f84832cf1f0>

In [9]:
len(meta_network_post.nodes)

23323

In [10]:
sig_input = nc.filter_input_nodes_not_in_pkn(sig_input, meta_network_post)


COSMOS: 17 input/measured nodes are not inPKN anymore: {removed_nodes}


In [11]:
meta_network_post_post = nc.keep_controllable_neighbours(sig_input, meta_network_post)

In [12]:
len(meta_network_post_post.nodes)

11333

In [13]:
metab_input = nc.filter_input_nodes_not_in_pkn(metab_input, meta_network_post_post)

COSMOS: 154 input/measured nodes are not inPKN anymore: {removed_nodes}


In [14]:
meta_network_post_post_post = nc.keep_observable_neighbours(metab_input, meta_network_post_post)

In [15]:
sig_input = nc.filter_input_nodes_not_in_pkn(sig_input, meta_network_post_post_post)

COSMOS: 7 input/measured nodes are not inPKN anymore: {removed_nodes}


In [16]:
meta_network_compressed, signatures, dup_parents = nc.compress_same_children(meta_network_post_post_post, sig_input, metab_input)

In [17]:
meta_network_compressed = nc.meta_network_cleanup(meta_network_compressed)

In [18]:
len(meta_network_compressed.nodes)

5089

In [19]:
tf_regn = dc.get_collectri()
tf_regn

Unnamed: 0,source,target,weight,PMID
0,MYC,TERT,1,10022128;10491298;10606235;10637317;10723141;1...
1,SPI1,BGLAP,1,10022617
2,SMAD3,JUN,1,10022869;12374795
3,SMAD4,JUN,1,10022869;12374795
4,STAT5A,IL2,1,10022878;11435608;17182565;17911616;22854263;2...
...,...,...,...,...
43173,NFKB,hsa-miR-143-3p,1,19472311
43174,AP1,hsa-miR-206,1,19721712
43175,NFKB,hsa-miR-21-5p,1,20813833;22387281
43176,NFKB,hsa-miR-224-5p,1,23474441;23988648


In [20]:
before=1
after=0
i=0

while before != after and i < 10:
    before = len(meta_network_compressed.nodes)
    moon_res = nc.run_moon_core(sig_input, metab_input, meta_network_compressed, n_layers=100, statistic='wmean')
    meta_network_compressed = nc.filter_incohrent_TF_target(moon_res, tf_regn, meta_network_compressed, rna_input)
    after = len(meta_network_compressed.nodes)
    i += 1

In [25]:
moon_res.reset_index(inplace=True)
moon_res.rename(columns={'index': 'source'}, inplace=True)


In [21]:
compression_dict = {"node_signatures": signatures, "duplicated_signatures": dup_parents}

In [22]:
meta_network_compressed.nodes

NodeView(('SPHK2', 'HDAC2', 'HDAC1', 'parent_of_Metab__sphs1p_c1', 'parent_of_Metab__HMDB0001383_c1', 'Metab__trpglugly_c', 'Gene7387__orphanReacTRPGLUGLYr', 'EGFR', 'PIK3CA', 'PIK3R2', 'CAV1', 'PLCG1', 'CYTH2', 'FRS2', 'VAV2', 'EPS15', 'TNK2', 'CTNNB1', 'HGS', 'STAT1', 'PLD2', 'PTPN1', 'PCNA', 'ARF6', 'PRKDC', 'GSTP1', 'ERBB2', 'CTNND1', 'CRK', 'BCAR1', 'HDAC6', 'EZR', 'STAT5B', 'PTK2', 'GRB2', 'NCK1', 'NCK2', 'SOS1', 'PLCG2', 'CBL', 'CCND1', 'E2F1', 'STAT3', 'CRKL', 'ERRFI1', 'SHC1', 'MAP2K1', 'parent_of_HRAS1_____KRAS1', 'parent_of_MAPK81', 'parent_of_CSK1', 'parent_of_MARCKS-1_____PPP3R11_____CAMK11_____CAMKK21_____PPP3CB1_____RIN11_____NRAS-1_____CAMK2G1_____UNC13B1_____ATP2B11_____RAC11_____PPP3CA1_____IQGAP11', 'parent_of_ACTR21_____ACTB1', 'Gene7400__orphanReacTRPVALASPr', 'Metab__HMDB0000883_c', 'Metab__HMDB0000191_c', 'Metab__HMDB0000929_c', 'Gene359__DEGS1', 'Metab__HMDB0001197_c', 'Metab__crm_hs_c', 'Gene207__BTD', 'Metab__HMDB0000182_m', 'Metab__HMDB0000030_m', 'Metab__tyr

In [26]:
moon_res = nc.decompress_moon_result(moon_res, compression_dict, meta_network_compressed)


In [28]:
sol_network = nc.reduce_solution_network(moon_res, meta_network_compressed, 0, sig_input, rna_input)

In [29]:
sol_network.nodes

NodeView(())

In [26]:
tf_regn.set_index('source', inplace=True, drop=True)
RNA_df = pd.DataFrame.from_dict(rna_input, orient='index', columns=['RNA_input'])


In [27]:

reg_meta = moon_res[moon_res.index.isin(tf_regn.index)]
reg_meta = reg_meta.join(tf_regn)
reg_meta.rename(columns={'score': 'TF_score'}, inplace=True)

reg_meta = pd.merge(reg_meta, RNA_df, left_on='target', right_index=True)

In [28]:
import numpy as np

In [30]:
reg_meta

Unnamed: 0,TF_score,level,target,weight,PMID,RNA_input
ENO1,-0.608805,2,MYC,-1,10082554;10403782;10528225;10681589;10802057;1...,-0.658955
ENO1,-0.608805,2,BCL2L1,-1,11973636,-0.179341
ENO1,-0.608805,2,CASP3,-1,11973636,-0.558814
ENO1,-0.608805,2,HSPA4,-1,15794924,0.474568
ENO1,-0.608805,2,JUN,-1,15794924,0.884750
...,...,...,...,...,...,...
SFPQ,-0.144298,8,EPHX1,1,23714182,-0.305495
SFPQ,-0.144298,8,ACTB,1,23756766,0.946368
SFPQ,-0.144298,8,HSPA9,1,8473291,-0.217217
SFPQ,-0.144298,8,NR3C1,1,19339282,0.236506


In [None]:
regulons = nx.to_pandas_edgelist(meta_network_compressed)
regulons.rename(columns={"sign": "mor"}, inplace=True)
regulons = regulons[~regulons["source"].isin(downstream_input.keys())]

decoupler_mat = pd.DataFrame(
    list(downstream_input.values()),
    index=downstream_input.keys()).T

if "wmean" in statistic:
    estimate, norm, corr, pvals = dc.run_wmean(
        mat=decoupler_mat,
        net=regulons,
        times=n_perm,
        weight=None,
        min_n=1
    )
    if statistic == "norm_wmean":
        estimate = norm
elif statistic == "ulm":
    estimate, pvals = dc.run_ulm(
        mat=decoupler_mat,
        net=regulons,
        weight=None,
        min_n=1
    )

n_plus_one = estimate.T
n_plus_one.columns = ["score"]
n_plus_one["level"] = 1

res_list = [n_plus_one]
i = 1
while len(regulons) > 1 and \
        regulons["target"].isin(res_list[i - 1].index.values).sum() > 1 and \
        i <= n_layers:

    regulons = regulons[~regulons["source"].isin(res_list[i - 1].index.values)]
    previous_n_plus_one = res_list[i - 1].drop(columns="level").T

    if "wmean" in statistic:
        estimate, norm, corr, pvals = dc.run_wmean(
            mat=previous_n_plus_one,
            net=regulons,
            times=n_perm,
            weight=None,
            min_n=1
        )
        if statistic == "norm_wmean":
            estimate = norm
    elif statistic == "ulm":
        estimate, pvals = dc.run_ulm(
            mat=previous_n_plus_one,
            net=regulons,
            weight=None,
            min_n=1
        )

    n_plus_one = estimate.T
    regulons = regulons[~regulons["source"].isin(n_plus_one.index.values)]
    n_plus_one["level"] = i + 1
    res_list.append(n_plus_one)
    i += 1

recursive_decoupleRnival_res = pd.concat(res_list)

downstream_names = pd.DataFrame.from_dict(downstream_input, orient="index", columns=["score"])
downstream_names = downstream_names[abs(downstream_names["score"]) > downstream_cutoff]
downstream_names["level"] = 0

recursive_decoupleRnival_res = pd.concat([recursive_decoupleRnival_res, downstream_names])

if upstream_input is not None:
    upstream_input_df = pd.DataFrame.from_dict(upstream_input, orient="index", columns=["real_score"])
    upstream_input_df = upstream_input_df.join(recursive_decoupleRnival_res, how='right')
    upstream_input_df = upstream_input_df[(np.sign(upstream_input_df["real_score"]) == np.sign(upstream_input_df["score"])) | (np.isnan(upstream_input_df["real_score"]))]
    recursive_decoupleRnival_res = upstream_input_df.drop(columns="real_score")


In [None]:
recursive_decoupleRnival_res

Unnamed: 0,score,level
Gene1000000003__SLC7A6,0.354939,1
Gene10001__SLC7A6_TRANSPORTER2,0.354939,1
Gene1000200002__SLC7A6,0.354939,1
Gene1000300003__SLC7A6,0.354939,1
Gene1000400002__SLC7A6,0.354939,1
...,...,...
Metab__HMDB0000167_m,-0.604629,0
Metab__HMDB0000725_m,0.158506,0
Metab__HMDB0000158_m,-0.137388,0
Metab__HMDB0000294_m,0.822585,0


In [None]:
regulons = nx.to_pandas_edgelist(meta_network_compressed)
regulons.rename(columns={"sign": "mor"}, inplace=True)
regulons = regulons[~regulons["source"].isin(downstream_input.keys())]

decoupler_mat = pd.DataFrame(
    list(downstream_input.values()),
    index=downstream_input.keys()).T

In [None]:
import numpy as np
# Check for inf values
inf_values = np.isinf(decoupler_mat).any().any()

# Check for NaN values
nan_values = np.isnan(decoupler_mat).any().any()

inf_values, nan_values


(False, False)

In [None]:
estimate, pvals = dc.run_ulm(
        mat=decoupler_mat,
        net=regulons,
        weight=None,
        min_n=1
    )

In [None]:
moon_res = nc.run_moon_core(sig_input, metab_input, meta_network_compressed, n_layers=100)



ValueError: mat contains non finite values (nan or inf), please set them to 0 or remove them.

In [26]:
nc.filter_incohrent_TF_target(moon_res, tf_regn, meta_network_compressed, rna_input)

NameError: name 'moon_res' is not defined