In [1]:
import networkx as nx
import os
from pathlib import Path

ppi_networks_dir = Path("/home/timgarrels/masterthesis/datasets") / "some_PPI_networks"


In [2]:
def parse_ppi_tsv(tsv):
    return nx.read_edgelist(
        tsv,
        delimiter="\t",
        data=[
            ("node1_string_id", str),
            ("node2_string_id", str),
            ("neighborhood_on_chromosome", float),
            ("gene_fusion", float),
            ("phylogenetic_cooccurrence", float),
            ("homology", float),
            ("coexpression", float),
            ("experimentally_determined_interaction", float),
            ("database_annotated", float),
            ("automated_textmining", float),
            ("combined_score", float),
        ],
    )

In [3]:
networks = {
    n: parse_ppi_tsv(ppi_networks_dir / n)
    for n in os.listdir(ppi_networks_dir)
    if "cancer" in n or "brain" in n
}

In [4]:
networks

{'human_cancer_string_interactions_short.tsv': <networkx.classes.graph.Graph at 0x7f97ccda3d00>,
 'human_brain_development_string_interactions_short.tsv': <networkx.classes.graph.Graph at 0x7f97ccda3880>}

In [5]:
def get_node_mapping(g, shift:int = 1):
    return {n: i + 1 for i, n in enumerate(g.nodes)}


In [8]:
mapping = get_node_mapping(networks['human_cancer_string_interactions_short.tsv'])
assert len(list(mapping.values())) == len(set(mapping.values())), "Mapping not unique!"

reverse_lookup = {v: k for k, v in mapping.items()}

reverse_lookup[145], reverse_lookup[77], reverse_lookup[31]

('AFF1', 'EML4', 'CREBBP')

In [7]:
mapping = get_node_mapping(networks['human_brain_development_string_interactions_short.tsv'])
assert len(list(mapping.values())) == len(set(mapping.values())), "Mapping not unique!"

reverse_lookup = {v: k for k, v in mapping.items()}

# reverse_lookup[405], reverse_lookup[103]