# Workflow 1, Module 3a

Given a disease, first find chemical agents that make the disease worse. Then look for genes that decrease the amount of or response to the toxicant, and try to increase the amount or action of those genes.

In [3]:
input_disease = 'MONDO:0005148'  #Type 2 diabetes

## Functions

In [4]:
import requests
import pandas as pd

robokop_server = 'robokop.renci.org'

def quick(question):
    url=f'http://{robokop_server}:80/api/simple/quick/'
    response = requests.post(url,json=question)
    print( f"Return Status: {response.status_code}" )
    if response.status_code == 200:
        return response.json()
    return response

In [5]:
def make_N_step_question(types,curies,props,forwards):
    question = {
                'machine_question': {
                    'nodes': [],
                    'edges': []
                }
            }
    for i,t in enumerate(types):
        newnode = {'id': i, 'type': t}
        if curies[i] is not None:
            newnode['curie'] = curies[i]
        question['machine_question']['nodes'].append(newnode)
        if i > 0:
            if forwards[i-1]:
                edge = {'source_id': i-1, 'target_id': i}
            else:
                edge = {'source_id': i, 'target_id': i-1}
            if props[i-1] is not None:
                edge['type'] = props[i-1]
            question['machine_question']['edges'].append( edge )
    return question

In [6]:
def merge_answers(answerdict):
    """Take a dictionary of answer lists and return a single sorted answer list."""
    final_answer = {'misc_info': {'natural_question': None, 'num_total_paths': 0}, 'answers': []}
    for k,v in answerdict.items():
        if 'filename' in v: 
            final_answer['filename'] = v['filename']
        if 'timestamp' in v:
            final_answer['timestamp'] = v['timestamp']
        if 'misc_info' in v:
            final_answer['misc_info']['num_total_paths'] += v['misc_info']['num_total_paths']
        if 'answers' in v:
            final_answer['answers'].extend(v['answers'])
    final_answer['answers'].sort(key = lambda a: -a['score'])

    return final_answer

In [7]:
import pandas as pd
def answers2frame(graph_answers,properties_a,properties_b):
    answers = []
    for graph_answer in graph_answers['answers']:
        try:
            #The return nodes don't necessarily come out in order. We can recognize the disease and the gene
            # by type, but we have to get the original toxicant by finding which node has the correct id.
            for edge in graph_answer['edges']:
                if edge['type']=='contributes_to':
                    toxicant_id = edge['source_id']
                elif edge['type'] in properties_a:
                    gene_tox = edge
                elif edge['type'] in properties_b:
                    chem_gene = edge
            for node in graph_answer['nodes']:
                if node['type'] == 'gene':
                    gene = node
                elif node['type'] == 'chemical_substance':
                    if node['id'] == toxicant_id:
                        toxicant=node
                    else:
                        drug=node
            ans = { 'score': graph_answer['score'],
                    'toxicant': toxicant['name'] if 'name' in toxicant else toxicant['id'],
                    'gene': gene['name'],
                    'chemical': drug['name'] if 'name' in drug else drug['id'],
                    'gene->toxicant': gene_tox['type'],
                    'chem->gene': chem_gene['type']}
        except IndexError:
            #this happens if chemical and toxicant are the same thing
            continue
        answers.append(ans)
    df = pd.DataFrame(answers)
    ordered_columns = ['score','chemical','chem->gene','gene','gene->toxicant','toxicant']
    df = df[ordered_columns]
    return df

## Set up the property lists

There are several properties at each step that are acceptable for our purposes. 

In [8]:
properties_a=['increases_degradation_of',
              'decreases_abundance_of',
              'decreases_response_to']
properties_b=['increases_activity_of',
              'increases_expression_of',
              'decreases_degradation_of',
              'increases_stability_of',
              'increases_synthesis_of',
              'increases_secretion_of']
types = ['disease',
        'chemical_substance',
        'gene',
        'chemical_substance']
curies = [input_disease,None,None,None]
forwards = [False,False,False]

## Loop over property pairs, calling quick for each pair

In [9]:
all_answers = {}
for prop_a in properties_a:
    for prop_b in properties_b:
        question = make_N_step_question(types,curies,['contributes_to',prop_a,prop_b],forwards)
        all_answers[ (prop_a,prop_b) ] = quick(question)

Return Status: 200
Return Status: 200
Return Status: 500
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200
Return Status: 200


In [10]:
total_answers = merge_answers(all_answers)

In [11]:
from IPython.display import display
df = answers2frame(total_answers,properties_a,properties_b)
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(df)

Unnamed: 0,score,chemical,chem->gene,gene,gene->toxicant,toxicant
0,5.892455,N-nitrosodiethylamine,increases_expression_of,BCL2,decreases_response_to,cadmium atom
1,3.05487,N-nitrosodiethylamine,increases_expression_of,BTG2,decreases_response_to,N-nitrosodiethylamine
2,2.758397,superoxide,increases_synthesis_of,APP,decreases_abundance_of,copper atom
3,2.221609,superoxide,increases_expression_of,ATP7A,decreases_abundance_of,iron atom
4,2.131269,glucose,increases_activity_of,INS,decreases_response_to,streptozocin
5,2.082391,glucose,increases_secretion_of,INS,decreases_response_to,streptozocin
6,1.920526,glyburide,increases_secretion_of,INS,decreases_response_to,streptozocin
7,1.864246,rosiglitazone,increases_activity_of,SOD2,decreases_response_to,streptozocin
8,1.852619,cholesterol,increases_activity_of,CETP,decreases_abundance_of,HDL cholesterol
9,1.836411,cholesterol,increases_activity_of,CETP,decreases_abundance_of,triglyceride


In [12]:
#import json
#print(json.dumps(total_answers['answers'][0],indent=4))

In [14]:
import json
with open('answer12.json','w') as outf:
    json.dump(total_answers['answers'][12],outf,indent=4)
with open('answer0.json','w') as outf:
    json.dump(total_answers['answers'][0],outf,indent=4)

The first few roles are essentially getting at the role of oxidative stress, which appears to be complex.  

All of the cholesterol rows are interesting because they have one form of cholesterol as a toxicant, and another as a chemical that reduces the toxicant.  This seems odd at first, but actually makes a lot of sense: biological systems that reduce a substance are often kicked into gear by the presence of that substance.  We should probably think about automatically removing these circular rows, or finding a way to make use of them.

The next few rows are fibrates, which are cholesterol lowering drugs, often given to diabetics.

Finally, at #16, we have metformin.  Here, it's appearing because it increases catalase activity.  This has been noted in the literature as a way that metformin can act by reducing oxidative stress.

### Rare Diseases

The above was all carried out for type 2 diabetes, a common disease.  Here we try to run the same thing but for rare forms of diabetes (as found in WF1Mod1_Ontology.ipynb). We're going to try a lot, because the data is sparse for these diseases, and we will see that most of them do not return any results.

In [10]:
rare_diseases = ['MONDO:0014785',
'MONDO:0011986',
'MONDO:0012819',
'MONDO:0012513',
'MONDO:0014458',
'MONDO:0012970',
'MONDO:0013478',
'MONDO:0013242',
'MONDO:0009575',
'MONDO:0016391',
'MONDO:0010802',
'MONDO:0016422',
'MONDO:0012971',
'MONDO:0011955',
'MONDO:0015308',
'MONDO:0014991',
'MONDO:0009517',
'MONDO:0013647',
'MONDO:0011363',
'MONDO:0014523',
'MONDO:0011386',
'MONDO:0017230',
'MONDO:0008763',
'MONDO:0012966',
'MONDO:0012436',
'MONDO:0012348',
'MONDO:0012520',
'MONDO:0007455',
'MONDO:0008812',
'MONDO:0020569',
'MONDO:0018105',
'MONDO:0012818',
'MONDO:0007669',
'MONDO:0014686',
'MONDO:0007452',
'MONDO:0013225',
'MONDO:0007453',
'MONDO:0012480',
'MONDO:0009874',
'MONDO:0013240',
'MONDO:0010020',
'MONDO:0012962',
'MONDO:0012923',
'MONDO:0011502',
'MONDO:0010800',
'MONDO:0014589',
'MONDO:0012963',
'MONDO:0008696',
'MONDO:0011027',
'MONDO:0011273',
'MONDO:0013673',
'MONDO:0018581',
'MONDO:0018911',
'MONDO:0011073',
'MONDO:0010773',
'MONDO:0011667',
'MONDO:0014488',
'MONDO:0011668',
'MONDO:0012192',
'MONDO:0020525',
'MONDO:0019207',
'MONDO:0010785',
'MONDO:0009192',
'MONDO:0018320',
'MONDO:0015967',
'MONDO:0014674',
'MONDO:0000208',
'MONDO:0018625',
'MONDO:0008185',
'MONDO:0009419',
'MONDO:0010894',
'MONDO:0011643',
'MONDO:0012071',
'MONDO:0009101',
'MONDO:0000065',
'MONDO:0018575',
'MONDO:0012969',
'MONDO:0014497',
'MONDO:0018629',
'MONDO:0018883',
'MONDO:0012522',
'MONDO:0010026',
'MONDO:0011072',
'MONDO:0008491']

In [11]:
def do_one(did):
    curies = [did,None,None,None]
    all_answers = {}
    for prop_a in properties_a:
        for prop_b in properties_b:
            question = make_N_step_question(types,curies,['contributes_to',prop_a,prop_b],forwards)
            all_answers[ (prop_a,prop_b) ] = quick(question)
    return merge_answers(all_answers)

In [12]:
outputs = {}
for rare_id in rare_diseases:
    rare_answer = do_one(rare_id)
    print(rare_id,rare_answer['misc_info']['num_total_paths'])
    if rare_answer['misc_info']['num_total_paths'] > 0:
        outputs[rare_id] = rare_answer

Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
MONDO:0014785 0
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
MONDO:0011986 0
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 500
Return Status: 200
Return Status: 200
Return Status: 200

In [13]:
outputs.keys()

dict_keys(['MONDO:0012819'])