# Customizing NetworkX Graphs

In [1]:
#%%capture
!pip install rarecrowds
!pip install networkx==2.5.1
!pip install pronto==2.4.1
!pip install plotly==4.14.3
!pip install pydot==1.4.2

Collecting rarecrowds
[?25l  Downloading https://files.pythonhosted.org/packages/d1/21/e541a05efcc5885981969d17139c45a3d100010ed6823de7b53e8ba5525d/rarecrowds-0.0.5-py3-none-any.whl (25.6MB)
[K     |████████████████████████████████| 25.6MB 38.8MB/s 
[?25hCollecting tqdm==4.47.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/62/7663894f67ac5a41a0d8812d78d9d2a9404124051885af9d77dc526fb399/tqdm-4.47.0-py2.py3-none-any.whl (66kB)
[K     |████████████████████████████████| 71kB 7.0MB/s 
Collecting protobuf==3.15.7
[?25l  Downloading https://files.pythonhosted.org/packages/31/78/cc23cd87d4929bfc9b26a39e94608a0ae71fa61dc44902f111d8ff1be00f/protobuf-3.15.7-cp37-cp37m-manylinux1_x86_64.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 52.0MB/s 
[?25hCollecting pronto==2.4.1
[?25l  Downloading https://files.pythonhosted.org/packages/f6/92/bc290a3afc93107e8d2c156d9083f3f9c3a0a2a77be610fbbb496708fe76/pronto-2.4.1-py2.py3-none-any.whl (59kB)
[K     |███████████████



In [2]:
import networkx as nx
from pronto import Ontology
import plotly.graph_objects as go
from rarecrowds import PatientSampler

In [3]:
hp_obo_url = "http://purl.obolibrary.org/obo/hp.obo"
hp = Ontology(hp_obo_url)

  


In [4]:
hp["HP:0000077"] # Abnormality of the kidney

Term('HP:0000077', name='Abnormality of the kidney')

Initial 10th terms

In [5]:
[term for term in hp.terms()][:10]

[Term('HP:0000001', name='All'),
 Term('HP:0000002', name='Abnormality of body height'),
 Term('HP:0000003', name='Multicystic kidney dysplasia'),
 Term('HP:0000005', name='Mode of inheritance'),
 Term('HP:0000006', name='Autosomal dominant inheritance'),
 Term('HP:0000007', name='Autosomal recessive inheritance'),
 Term('HP:0000008', name='Abnormal morphology of female internal genitalia'),
 Term('HP:0000009', name='Functional abnormality of the bladder'),
 Term('HP:0000010', name='Recurrent urinary tract infections'),
 Term('HP:0000011', name='Neurogenic bladder')]

## Build the graph

In [6]:
def parse_comment(term):
    return term.comment if term.comment else ""

In [7]:
def parse_synonyms(term):
    syns = []
    for synom in term.synonyms:
        syn = {
            "label": synom.description,
            "scope": synom.scope,
            "type": synom.type.id if synom.type else None,
            "xrefs": [xr.id for xr in synom.xrefs] if synom.xrefs else None,
        }
        syns.append(syn)
    return syns


In [8]:
def add_node(G, id, term):
    # print("adding node", id, term)
    G.add_node(
        id,
        label=term.name,
        desc=str(term.definition),
        comment=parse_comment(term),
        synonyms=parse_synonyms(term),
    )


In [9]:
def add_edge(G, term):
    for sub in term.subclasses(1):
        if sub.id != term.id:
            G.add_edge(term.id, sub.id)

In [10]:
G = nx.DiGraph()
for id in hp:
    term = hp[id]
    if not term.obsolete:
        add_node(G, id, term)
        add_edge(G, term)


In [11]:
len(G.nodes())

15872

In [12]:
G.nodes()["HP:0000078"]

{'comment': '',
 'desc': 'An abnormality of the genital system.',
 'label': 'Abnormality of the genital system',
 'synonyms': [{'label': 'Genital abnormalities',
   'scope': 'EXACT',
   'type': 'layperson',
   'xrefs': None},
  {'label': 'Genital anomalies',
   'scope': 'EXACT',
   'type': 'layperson',
   'xrefs': None},
  {'label': 'Abnormality of the reproductive system',
   'scope': 'RELATED',
   'type': 'layperson',
   'xrefs': None},
  {'label': 'Genital abnormality',
   'scope': 'EXACT',
   'type': 'layperson',
   'xrefs': None},
  {'label': 'Genital defects',
   'scope': 'EXACT',
   'type': 'layperson',
   'xrefs': None}]}

In [13]:
[edge for edge in G.edges()][:10]

[('HP:0000001', 'HP:0000005'),
 ('HP:0000001', 'HP:0000118'),
 ('HP:0000001', 'HP:0012823'),
 ('HP:0000001', 'HP:0032223'),
 ('HP:0000001', 'HP:0032443'),
 ('HP:0000001', 'HP:0040279'),
 ('HP:0000005', 'HP:0000006'),
 ('HP:0000005', 'HP:0000007'),
 ('HP:0000005', 'HP:0001425'),
 ('HP:0000005', 'HP:0001426')]

In [14]:
[successor for successor in G.successors("HP:0000005")]

['HP:0000006',
 'HP:0000007',
 'HP:0001425',
 'HP:0001426',
 'HP:0001427',
 'HP:0001428',
 'HP:0001466',
 'HP:0003743',
 'HP:0003745',
 'HP:0010985',
 'HP:0032113',
 'HP:0032382']

In [15]:
def _predecessors(G, id, depth):
    depth -= 1
    items = set()
    if G.has_node(id):
        for item in G.predecessors(id):
            items.add(item)
            if depth != 0:
                for it in _predecessors(G, item, depth):
                    items.add(it)
    return items

def predecessors(G, ids, depth=1):
    if not type(ids) is list:
        ids = [ids]
    items = set()
    for id in ids:
        for item in _predecessors(G, id, depth):
            items.add(item)
    res = list(items)
    res.sort()
    return res


In [16]:
def _successors(G, id, depth):
    depth -= 1
    items = set()
    for item in G.successors(id):
        items.add(item)
        if depth != 0:
            for it in _successors(G, item, depth):
                items.add(it)
    return items

def successors(G, ids, depth=1):
    if not type(ids) is list:
        ids = [ids]
    items = set()
    for id in ids:
        for item in _successors(G, id, depth):
            items.add(item)
    res = list(items)
    res.sort()
    return res

In [17]:
_successors(G, id="HP:0000005", depth=1)

{'HP:0000006',
 'HP:0000007',
 'HP:0001425',
 'HP:0001426',
 'HP:0001427',
 'HP:0001428',
 'HP:0001466',
 'HP:0003743',
 'HP:0003745',
 'HP:0010985',
 'HP:0032113',
 'HP:0032382'}

In [18]:
successors(G, ids=["HP:0000005"])

['HP:0000006',
 'HP:0000007',
 'HP:0001425',
 'HP:0001426',
 'HP:0001427',
 'HP:0001428',
 'HP:0001466',
 'HP:0003743',
 'HP:0003745',
 'HP:0010985',
 'HP:0032113',
 'HP:0032382']

In [19]:
predecessors(G, ids=["HP:0000005"], depth=1)

['HP:0000001']

In [20]:
predecessors(G, ids=["HP:0000091"], depth=10)

['HP:0000001',
 'HP:0000077',
 'HP:0000079',
 'HP:0000118',
 'HP:0000119',
 'HP:0010935',
 'HP:0012210',
 'HP:0012575']

## Plot Disease

In [21]:
sampler = PatientSampler()
patients = sampler.sample(patient_params="default", N=5)
patient = patients['ORPHA:324']['cohort'][0]['phenotype']
patient = {"phenotype": patient}
patient

{'phenotype': {'HP:0001131': {},
  'HP:0001446': {},
  'HP:0001510': {},
  'HP:0001712': {},
  'HP:0001877': {},
  'HP:0002027': {},
  'HP:0002637': {},
  'HP:0003336': {},
  'HP:0008969': {},
  'HP:0011276': {},
  'HP:0012211': {},
  'HP:0012638': {},
  'HP:0025276': {}}}

In [22]:
def prepare_data(G, hpo, disease_set):
    mapping = {n: n.replace(":", "_") for n in G.nodes}
    G = nx.relabel_nodes(G, mapping)
    pos = nx.drawing.nx_pydot.graphviz_layout(G, prog="dot")
    data = {
        "edges": {"x": [], "y": []},
        "preds": {"x": [], "y": [], "labels": []},
        "phens": {"x": [], "y": [], "labels": []},
    }
    for edge in G.edges:
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        data["edges"]["x"].append(x0)
        data["edges"]["x"].append(x1)
        data["edges"]["x"].append(None)
        data["edges"]["y"].append(y0)
        data["edges"]["y"].append(y1)
        data["edges"]["y"].append(None)
    for node in G.nodes:
        x, y = pos[node]
        label = hpo[node.replace("_", ":")]
        # print("label", label)
        # print("label", label.id)
        label = f"{label.id}: {label.name}"
        if node.replace("_", ":") in disease_set:
            data["phens"]["x"].append(x)
            data["phens"]["y"].append(y)
            data["phens"]["labels"].append(label)
        else:
            data["preds"]["x"].append(x)
            data["preds"]["y"].append(y)
            data["preds"]["labels"].append(label)
    return data

In [23]:
def scatter_edge_trace(plt_data):
    return go.Scatter(
        x=plt_data["edges"]["x"],
        y=plt_data["edges"]["y"],
        name="HPO links",
        line=dict(width=0.75, color="#888"),
        hoverinfo="none",
        mode="lines",
    )


In [24]:
def scatter_pred_trace(plt_data):
    return go.Scatter(
        x=plt_data["preds"]["x"],
        y=plt_data["preds"]["y"],
        name="Predecessor terms",
        text=plt_data["preds"]["labels"],
        mode="markers",
        marker=dict(color="#888", size=5, line_width=0),
    )


In [25]:
def scatter_terms_trace(plt_data):
    return go.Scatter(
        x=plt_data["phens"]["x"],
        y=plt_data["phens"]["y"],
        name="Input terms",
        mode="markers",
        text=plt_data["phens"]["labels"],
        marker=dict(size=10, line_width=1),
    )


In [26]:
def get_title(disease):
    name = disease.get("name")
    code = disease.get("id")
    title = "HPO comparison"
    if name or code:
        if name:
            title += f" of a {name} patient"
        if code:
            if "orpha" in code.lower():
                link = "http://www.orpha.net/consor/cgi-bin/OC_Exp.php?lng=en&Expert="
                link += code.split(":")[1]
            elif "omim" in code.lower():
                link = "https://www.omim.org/entry/"
                link += code.split(":")[1]
            elif "mondo" in code.lower():
                link = "https://monarchinitiative.org/disease/"
                link += code.upper()
            title += f" <a href='{link}'>({code})</a>"
    return title


In [27]:
def get_figure_object(plt_data):
    edge_trace = scatter_edge_trace(plt_data)
    pred_trace = scatter_pred_trace(plt_data)
    terms_trace = scatter_terms_trace(plt_data)

    return go.Figure(
        data=[edge_trace, pred_trace, terms_trace],
        layout=go.Layout(
            width=1000,
            height=600,
            showlegend=True,
            hovermode="closest",
            margin=dict(b=20, l=5, r=5, t=40),
            xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
            yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        ),
    )


In [28]:
def plot_disease(G, patient, disease, name: str = "", code: str = ""):
    patient_set = set(patient["phenotype"])
    patient_list = list(patient_set)
    hpo_set = set(predecessors(G, patient_list, 1000))
    hpo_set = patient_set.union(hpo_set)
    try:
        hpo_set.remove("HP:0000001")
    except:
        pass
    G = G.subgraph(list(hpo_set))

    plt_data = prepare_data(G, hp, patient_set)
    
    fig = get_figure_object(plt_data)
    title = get_title(disease)
    fig.update_layout(title=title, titlefont_size=14)
    fig.show()
    return fig


In [29]:
disease = {  # This entry may also be a list of HPO terms.
    "name": patients["ORPHA:324"]["name"],
    "id": patients["ORPHA:324"]["id"],
    "phenotype": patients["ORPHA:324"]["phenotype"],
}

plot_disease(G, patient, disease)

In [31]:
from rarecrowds import PatientSampler
from rarecrowds import PhenotypicComparison

sampler = PatientSampler()
patients = sampler.sample(patient_params="default", N=5)
fig = PhenotypicComparison(patient = patients['ORPHA:324']['cohort'][0]['phenotype'])

"dot" with args ['-Tdot', '/tmp/tmp1vmj6ntp'] returned code: 1

stdout, stderr:
 b''
b"Error: /tmp/tmp1vmj6ntp: syntax error in line 2 near ':'\n"



AssertionError: ignored