In [1]:
import networkx as nx
import os
from pathlib import Path

ppi_networks_dir = Path("/home/timgarrels/Projects/masterthesis/datasets") / "some_PPI_networks"


In [2]:
def parse_ppi_tsv(tsv):
    return nx.read_edgelist(
        tsv,
        delimiter="\t",
        data=[
            ("node1_string_id", str),
            ("node2_string_id", str),
            ("neighborhood_on_chromosome", float),
            ("gene_fusion", float),
            ("phylogenetic_cooccurrence", float),
            ("homology", float),
            ("coexpression", float),
            ("experimentally_determined_interaction", float),
            ("database_annotated", float),
            ("automated_textmining", float),
            ("combined_score", float),
        ],
    )

In [3]:
networks = {
    n: parse_ppi_tsv(ppi_networks_dir / n)
    for n in os.listdir(ppi_networks_dir)
    if "cancer" in n or "brain" in n
}

In [4]:
networks

{'human_cancer_string_interactions_short.tsv': <networkx.classes.graph.Graph at 0x7f4e9cfc1ab0>,
 'human_brain_development_string_interactions_short.tsv': <networkx.classes.graph.Graph at 0x7f4e9cf73580>}

In [5]:
def get_node_mapping(g, shift:int = 1):
    return {n: i + 1 for i, n in enumerate(g.nodes)}


In [6]:
mapping = get_node_mapping(networks['human_cancer_string_interactions_short.tsv'])
assert len(list(mapping.values())) == len(set(mapping.values())), "Mapping not unique!"

reverse_lookup = {v: k for k, v in mapping.items()}

reverse_lookup[77], reverse_lookup[130], reverse_lookup[39]

('EML4', 'ESR1', 'MECOM')

In [7]:
mapping = get_node_mapping(networks['human_brain_development_string_interactions_short.tsv'])
assert len(list(mapping.values())) == len(set(mapping.values())), "Mapping not unique!"

reverse_lookup = {v: k for k, v in mapping.items()}

[reverse_lookup[int(n)] for n in {'104', '110', '145', '157', '31', '39', '77', '94'}]

['FGF13', 'SLIT2', 'BCR', 'CDH2', 'RRM1', 'SIRT2', 'GLI1', 'OXCT1']

In [8]:
brain_mapping = get_node_mapping(networks['human_brain_development_string_interactions_short.tsv'])
brain_reverse_lookup = {v: k for k, v in brain_mapping.items()}
cancer_mapping = get_node_mapping(networks['human_cancer_string_interactions_short.tsv'])
cancer_reverse_lookup = {v: k for k, v in cancer_mapping.items()}

In [9]:
datasets = Path("/home/timgarrels/Projects/masterthesis/datasets")

cancer_graph = nx.read_edgelist(datasets / "human_cancer_cutoff_0.935.edgelist")
brain_graph = nx.read_edgelist(datasets / "human_brain_development_cutoff_0.772.edgelist")

In [10]:
real_cancer_nodes = [cancer_reverse_lookup[int(n)] for n in cancer_graph.nodes]
real_brain_nodes = [brain_reverse_lookup[int(n)] for n in brain_graph.nodes]

In [11]:
len(set(real_cancer_nodes).intersection(real_brain_nodes))

68

In [12]:
len(set(real_cancer_nodes) - set(real_brain_nodes))

417

In [13]:
len(set(real_brain_nodes) - set(real_cancer_nodes))

471