# NetworkX Phenotypic Comparison

In [5]:
import os
import networkx as nx
import plotly.graph_objects as go
from rarecrowds import PatientSampler
from rarecrowds import PhenotypicComparison
from rarecrowds.utils.hpo import Hpo
from pronto import Ontology
from rarecrowds.utils.ontograph import OntoGraph

## Build the graph

In [2]:
sampler = PatientSampler()
patients = sampler.sample(patient_params="default", N=5)

In [16]:
class HpoOntologyGraph(OntoGraph):
    def __init__(self):
        self.purl = "http://purl.obolibrary.org/obo/hp.obo"
        cwd = os.getcwd()
        filename = os.path.join(cwd, "resources", "hp.pkl")
        super().__init__(filename)
        self.Graph = self._build_graph(Ontology(self.purl))
        self.root = [nd for nd, d in self.Graph.in_degree() if d == 0][0]
        # if update:
        #     super().save(_pkl_path)

    def _add_node(self, G, id, term):
        # G.add_node(id, name=term.name, desc=str(term.definition), comment=self._parse_comment(term), synonyms=self._parse_synonyms(term))
        G.add_node(id, id=id, label=term.name)

    def _add_edge(self, G, id, term):
        for sub in term.subclasses(1):
            if sub.id != term.id:
                G.add_edge(term.id, sub.id)

    def simplify(self, ids):
        all_preds = self.predecessors(ids, 1000)
        res = set(ids) - set(all_preds)
        return res

In [17]:
#hpo = Hpo(update=True)
hpo = HpoOntologyGraph()

# hp_obo_url = "http://purl.obolibrary.org/obo/hp.obo"
# hpo = Ontology(hp_obo_url)

patient = patients["ORPHA:324"]["cohort"][0]["phenotype"]
disease = {
    "name": patients["ORPHA:324"]["name"],
    "id": patients["ORPHA:324"]["id"],
    "phenotype": patients["ORPHA:324"]["phenotype"],
}


  self.Graph = self._build_graph(Ontology(self.purl))


In [21]:
def prepare_data_ok(G, hpo, disease_set):
    mapping = {n: n.replace(":", "_") for n in G.nodes}
    G = nx.relabel_nodes(G, mapping)
    pos = nx.drawing.nx_pydot.graphviz_layout(G, prog="dot")
    data = {
        "edges": {"x": [], "y": []},
        "preds": {"x": [], "y": [], "labels": []},
        "phens": {"x": [], "y": [], "labels": []},
    }
    for edge in G.edges:
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        data["edges"]["x"].append(x0)
        data["edges"]["x"].append(x1)
        data["edges"]["x"].append(None)
        data["edges"]["y"].append(y0)
        data["edges"]["y"].append(y1)
        data["edges"]["y"].append(None)
    for node in G.nodes:
        x, y = pos[node]
        label = hpo[node.replace("_", ":")]
        # print("label", label)
        # print("label", label.id)
        label = f"{label.id}: {label.name}"
        if node.replace("_", ":") in disease_set:
            data["phens"]["x"].append(x)
            data["phens"]["y"].append(y)
            data["phens"]["labels"].append(label)
        else:
            data["preds"]["x"].append(x)
            data["preds"]["y"].append(y)
            data["preds"]["labels"].append(label)
    return data

In [19]:
hpo_set = set()
## Get patient set
patient_set = set(patient)
hpo_set = hpo_set.union(
    patient_set.union(set(hpo.predecessors(list(patient_set), 1000)))
)
## Get disease set
disease_set = set(disease)
hpo_set = hpo_set.union(
    disease_set.union(set(hpo.predecessors(list(disease_set), 1000)))
)
## Get subgraph
hpo_set.remove("HP:0000001")
G = hpo.Graph.subgraph(list(hpo_set))
G.nodes()

NodeView(('HP:0011025', 'HP:0002944', 'HP:0012531', 'HP:0000077', 'HP:0000925', 'HP:0010674', 'HP:0033127', 'HP:0009121', 'HP:0025032', 'HP:0011354', 'HP:0000079', 'HP:0012211', 'HP:0012443', 'HP:0025142', 'HP:0033354', 'HP:0012372', 'HP:0002027', 'HP:0025276', 'HP:0000119', 'HP:0011842', 'HP:0000478', 'HP:0001877', 'HP:0004328', 'HP:0020129', 'HP:0001939', 'HP:0011458', 'HP:0002650', 'HP:0001903', 'HP:0100659', 'HP:0025031', 'HP:0000924', 'HP:0002011', 'HP:0012639', 'HP:0001131', 'HP:0001574', 'HP:0001297', 'HP:0011276', 'HP:0000951', 'HP:0011277', 'HP:0004626', 'HP:0002597', 'HP:0000118', 'HP:0001626', 'HP:0003110', 'HP:0010935', 'HP:0001871', 'HP:0011121', 'HP:0000707', 'HP:0000481'))

In [23]:
plt_data = prepare_data_ok(G, patient_set, disease_set)

"dot" with args ['-Tdot', '/tmp/tmp_k75y4ot'] returned code: 1

stdout, stderr:
 b''
b"Error: /tmp/tmp_k75y4ot: syntax error in line 2 near ':'\n"



AssertionError: 1