# Workflow 1, Module 1 (condition similarity)

This notebook takes a look at the inverse of WF1 Mod1.  In that notebook, we start with a common disease and look for rare diseases that are similar phenotypically.  Here, we start with a rare disease and look for a common disease.

In [2]:
import requests
import pandas as pd

def quick(question,max_connectivity=None):
    url=f'http://robokop.renci.org:80/api/simple/quick'
    if max_connectivity is not None:
        url += f'?max_connectivity={max_connectivity}'
    print(url)
    response = requests.post(url,json=question)
    print( f"Return Status: {response.status_code}" )
    if response.status_code == 200:
        return response.json()
    return response

The basic machine question created below goes from a disease to a set of phenotypes to a genetic_condition.  Making the phenotypes a set allows there to be many phenotypes that connect the diseases.

In [3]:
def create_basic_question(disease_id):
    return {
    "machine_question": {
        "nodes": [
            {
                "id": "n0",
                "type": "disease",
                "curie": disease_id
            },
            {
                "id": "n1",
                "type": "phenotypic_feature",
                "set": True
            },
            {
                "id": "n2",
                "type": "disease"
            }
        ],
        "edges": [
            {
                "id": "e0",
                "source_id": "n0",
                "target_id": "n1"
            },
            {
                "id": "e1",
                "source_id": "n1",
                "target_id": "n2"
            }
        ]
    }
}

In [20]:
def parse_answer(returnanswer):
    #First, parse out the parts of the kg that we want, names and types
    kg_node_names = { n['id']: n['name'] if 'name' in n else n['id'] for n in returnanswer['knowledge_graph']['nodes'] }
    nodes_by_id = {n['id']:n for n in returnanswer['knowledge_graph']['nodes']}
    answers = [ {
                 "condition_id": answer['node_bindings']['n2'], 
                 "condition_name": nodes_by_id[answer['node_bindings']['n2']]['name'], #if 'name' in node else node['id'], 
                 "rare": nodes_by_id[answer['node_bindings']['n2']]['rare disease'] 
                    if 'rare disease' in nodes_by_id[answer['node_bindings']['n2']] else False,
                 "score" :      answer['score']
                }
              for answer in returnanswer['answers']]
    return pd.DataFrame(answers)

In [5]:
CS = ('Cockayne syndrome','MONDO:0016006')
UV = ('UV_sensitive_syndrome','MONDO:0015797')
T  = ('Triciothiodystrophy','MONDO:0011125')
XP = ('Xeroderma_pigmentosum','MONDO:0019600')
rare_diseases = [CS, UV, T, XP]

In [10]:
answers = {}
for name,disease in rare_diseases:
    q = create_basic_question(disease)
    answers[disease] = quick(q,max_connectivity = 1000)

http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200
http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200
http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200
http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200


In [28]:
frames = {}
for d in answers:
    f = parse_answer(answers[d])
    frames[d] = f[ f['rare'] == False ]

from IPython.display import display
for dname, did in rare_diseases:
    print(dname)
    display(frames[did].head())

Cockayne syndrome


Unnamed: 0,condition_id,condition_name,rare,score
2,MONDO:0021231,retina neoplasm,False,25.114838
4,MONDO:0015333,progeroid syndrome,False,24.673287
5,MONDO:0021190,DNA repair disease,False,24.655713
6,MONDO:0006573,lipodystrophy (disease),False,14.829836
10,MONDO:0003608,optic atrophy,False,13.623599


UV_sensitive_syndrome


Unnamed: 0,condition_id,condition_name,rare,score
3,MONDO:0021190,DNA repair disease,False,0.38999
10,MONDO:0022687,cerebellar degeneration,False,0.327187
11,MONDO:0021231,retina neoplasm,False,0.327187
19,MONDO:0002913,cerebellar neoplasm,False,0.260326
23,MONDO:0015333,progeroid syndrome,False,0.260326


Triciothiodystrophy


Unnamed: 0,condition_id,condition_name,rare,score
1,MONDO:0006590,palmoplantar keratosis,False,4.6843
2,MONDO:0006572,lichen planus,False,4.153256
4,MONDO:0006566,keratosis,False,4.007624
8,MONDO:0006541,epidermolysis bullosa,False,3.707642
10,MONDO:0003799,conjunctivitis (disease),False,3.642809


Xeroderma_pigmentosum


Unnamed: 0,condition_id,condition_name,rare,score
1,MONDO:0002913,cerebellar neoplasm,False,19.357747
2,MONDO:0022687,cerebellar degeneration,False,14.301886
9,MONDO:0015333,progeroid syndrome,False,13.562385
13,MONDO:0021190,DNA repair disease,False,13.006107
16,MONDO:0002236,ocular cancer,False,12.447572


We can potentially sharpen some of these answers if we allow our searches to include conditions that are similar by both phenotype and biological process.  That is, we want to allow two paths connecting the input to the output, saying that they should be similar phenotypically, but also similar in terms of the processes that create the disease:

In [29]:
def create_complex_question(disease_id):
    return {
    "machine_question": {
        "nodes": [
            {
                "id": "n0",
                "type": "disease",
                "curie": disease_id
            },
            {
                "id": "n1",
                "type": "phenotypic_feature",
                "set": True
            },
            {
                "id": "n2",
                "type": "disease"
            },
            {
                "id": "n3",
                "type": "biological_process_or_activity",
                "set": True
            }
        ],
        "edges": [
            {
                "id": "e0",
                "source_id": "n0",
                "target_id": "n1"
            },
            {
                "id": "e1",
                "source_id": "n1",
                "target_id": "n2"
            },
            {
                "id": "e2",
                "source_id": "n0",
                "target_id": "n3"
            },
            {
                "id": "e3",
                "source_id": "n3",
                "target_id": "n2"
            }
        ]
    }
}

In [30]:
answers = {}
for name,disease in rare_diseases:
    q = create_complex_question(disease)
    answers[disease] = quick(q,max_connectivity = 1000)

http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200
http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200
http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200
http://robokop.renci.org:80/api/simple/quick?max_connectivity=1000
Return Status: 200


In [31]:
frames = {}
for d in answers:
    f = parse_answer(answers[d])
    frames[d] = f[ f['rare'] == False ]

from IPython.display import display
for dname, did in rare_diseases:
    print(dname)
    display(frames[did].head())

Cockayne syndrome


Unnamed: 0,condition_id,condition_name,rare,score
0,MONDO:0021190,DNA repair disease,False,0
2,MONDO:0015333,progeroid syndrome,False,0


UV_sensitive_syndrome


Unnamed: 0,condition_id,condition_name,rare,score
9,MONDO:0021190,DNA repair disease,False,0


Triciothiodystrophy


Unnamed: 0,condition_id,condition_name,rare,score
5,MONDO:0021190,DNA repair disease,False,0


Xeroderma_pigmentosum


Unnamed: 0,condition_id,condition_name,rare,score
9,MONDO:0021190,DNA repair disease,False,0
13,MONDO:0015333,progeroid syndrome,False,0
