# Workflow 1, Module 3, Question 2

## What proteins produce agent [x]?

Let's run Q1 and use the endogenous output of that.

### Expand service

In [1]:
robokop_server = 'robokop.renci.org'

In [2]:
import requests
import pandas as pd

In [3]:
def expand(type1,identifier,type2,rebuild=None,csv=None,predicate=None):
    url=f'http://{robokop_server}:80/api/simple/expand/{type1}/{identifier}/{type2}'
    params = {'rebuild': rebuild, 
              'csv'    : csv,
              'predicate': predicate} 
    params = { k:v for k,v in params.items() if v is not None }
    response = requests.get(url,params=params)
    print( f'Return Status: {response.status_code}' )
    if response.status_code == 200:
        return response.json()
    return []

In [4]:
def parse_answer(returnanswer):
    nodes = [answer['nodes'][1] for answer in returnanswer['answers']]
    edges = [answer['edges'][0] for answer in returnanswer['answers']]
    answers = [ {"result_id": node["id"], 
                 "result_name": node["name"] if 'name' in node else node['id'], 
                 "type": edge["type"],
                 "source": edge['edge_source']}
              for node,edge in zip(nodes,edges)]
    return pd.DataFrame(answers)

### Question 2

In [8]:
FANCD2 = 'HGNC:3585'
all_results = expand('gene',FANCD2,'chemical_substance',rebuild=True)

Return Status: 200


In [30]:

CASP3='HGNC:1504'
hmm = expand('gene',CASP3,'chemical_substance',rebuild=True)

Return Status: 200


In [31]:
caspframe = parse_answer(hmm)
caspframe
#caspframe['type'].value_counts()

Unnamed: 0,result_id,result_name,source,type
0,CHEBI:28834,CHEBI:28834,ctd.drug_to_gene,affects_response_to
1,CHEBI:45713,resveratrol,ctd.gene_to_drug_extended,decreases_expression_of
2,PUBCHEM:58886963,arsenic trioxide,ctd.gene_to_drug_extended,decreases_expression_of
3,CHEBI:29678,sodium arsenite,ctd.gene_to_drug_extended,decreases_expression_of
4,CHEBI:3962,Curcumin,ctd.gene_to_drug_extended,decreases_expression_of
5,CHEBI:16243,Quercetin,ctd.gene_to_drug_extended,decreases_expression_of
6,CHEBI:15368,Acrolein,ctd.gene_to_drug_extended,decreases_expression_of
7,CHEBI:41423,Celecoxib,ctd.gene_to_drug_extended,decreases_expression_of
8,CHEBI:16469,Estradiol,ctd.gene_to_drug_extended,decreases_expression_of
9,CHEBI:50924,sorafenib,ctd.gene_to_drug_extended,decreases_expression_of


In [32]:
preds=['increases_expression_of',
'increases_abundance_of',
'increases_activity_of',
'increases_stability_of',
'decreases_degradation_of',
'affects_metabolic_processing',
'affects_splicing_of',
'affects_transport_of',
'increases_response_to',
'affects_molecular_modification_of']
results = {}
for p in preds:
    print(p)
    results[p] = expand('gene',CASP3,'chemical_substance',predicate=p)

increases_expression_of
Return Status: 200
increases_abundance_of
Return Status: 200
increases_activity_of
Return Status: 200
increases_stability_of
Return Status: 200
decreases_degradation_of
Return Status: 200
affects_metabolic_processing
Return Status: 200
affects_splicing_of
Return Status: 200
affects_transport_of
Return Status: 200
increases_response_to
Return Status: 200
affects_molecular_modification_of
Return Status: 200


In [33]:
r = []
for p in preds:
    if 'answers' in results[p]:
        frame = parse_answer(results[p])
        print(p,len(frame))
        frame['predicate'] = p
        r.append(frame)
all_results = pd.concat(r)
all_results

increases_expression_of 213
increases_activity_of 684
decreases_degradation_of 7
increases_response_to 1
affects_molecular_modification_of 1


Unnamed: 0,result_id,result_name,source,type,predicate
0,CHEBI:16325,CHEBI:16325,ctd.drug_to_gene_extended,increases_expression_of,increases_expression_of
1,CHEBI:32063,CHEBI:32063,ctd.drug_to_gene_extended,increases_expression_of,increases_expression_of
2,CHEBI:15738,Staurosporine,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
3,CHEBI:641,1-Methyl-4-phenylpyridinium,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
4,CHEBI:88217,"N,N,N',N'-tetrakis(2-pyridylmethyl)ethylenedia...",ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
5,CHEBI:136004,linsidomine,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
6,CHEBI:8633,puerarin,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
7,CHEBI:68508,diethyl maleate,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
8,CHEBI:28201,Rotenone,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of
9,CHEBI:28230,hesperetin,ctd.gene_to_drug_extended,increases_expression_of,increases_expression_of


Are there any predicates indicating a production?

In [10]:
all_results['type'].value_counts()

interacts_with                            64
decreases_expression_of                   48
increases_expression_of                   27
negatively_regulates__entity_to_entity    20
positively_regulates__entity_to_entity    12
directly_interacts_with                   11
affects_expression_of                      9
increases_activity_of                      6
increases_molecular_modification_of        5
decreases_activity_of                      4
increases_metabolic_processing_of          3
increases_synthesis_of                     3
affects_response_to                        2
decreases_molecular_modification_of        1
affects_metabolic_processing_of            1
increases_degradation_of                   1
affects_activity_of                        1
Name: type, dtype: int64

In [17]:
all_results[all_results['type']=='positively_regulates__entity_to_entity']

Unnamed: 0,result_id,result_name,source,type,Chemical
30,EC:1.14.13.179,EC:1.14.13.179,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:25858
115,EC:2.1.1.160,EC:2.1.1.160,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:27732


We're finding enzymes involved in creating the endogenous chemicals. But how come our genes are in EC format, rather than being translated to our normal HGNC?   It's because these are non-human enzymes and at the moment we're not synonymizing non-human genes. In fact, for the most part, we don't accept non-human genes, but we're not checking for that in the KEGG client.

In [11]:
all_results[all_results['type']=='increases_synthesis_of']

Unnamed: 0,result_id,result_name,source,type,Chemical
1,HGNC:2610,CYP2A6,ctd.drug_to_gene,increases_synthesis_of,CHEBI:68449
14,HGNC:435,ALOX5,ctd.drug_to_gene,increases_synthesis_of,CHEBI:28209
0,HGNC:40,ABCB1,ctd.drug_to_gene,increases_synthesis_of,PUBCHEM:273429


So here's an example of a metabolite (`PUBCHEM:273429`) that is increased by the action of a gene `ABC1`

### Cystic Fibrosis

In [12]:
cf = 'MONDO:0009061'
result = expand(type1,cf,type2)
result_frame = parse_answer(result)
endogenous = result_frame[ result_frame['type'] == 'related_to' ]
endogenous

Return Status: 200


Unnamed: 0,result_id,result_name,source,type
4,CHEBI:31835,methylparaben,hmdb.metabolite_to_disease,related_to
5,CHEBI:16325,Lithocholic Acid,hmdb.metabolite_to_disease,related_to
6,CHEBI:30805,Dodecanoic acid,hmdb.metabolite_to_disease,related_to
7,CHEBI:30813,Capric acid,hmdb.metabolite_to_disease,related_to
8,CHEBI:17201,Glycyl-glycine,hmdb.metabolite_to_disease,related_to
10,CHEBI:28621,Triethanolamine,hmdb.metabolite_to_disease,related_to
12,CHEBI:16755,Chenodeoxycholic Acid,hmdb.metabolite_to_disease,related_to
13,CHEBI:28834,Deoxycholic Acid,hmdb.metabolite_to_disease,related_to
14,CHEBI:6650,Malic acid,hmdb.metabolite_to_disease,related_to
15,CHEBI:16359,Cholic Acid,hmdb.metabolite_to_disease,related_to


In [13]:
identifiers = list(endogenous['result_id'])
genes = {}
for ident in identifiers:
    print(ident)
    genes[ident] = expand('chemical_substance',ident,'gene',rebuild=True)

CHEBI:31835
Return Status: 200
CHEBI:16325
Return Status: 200
CHEBI:30805
Return Status: 200
CHEBI:30813
Return Status: 200
CHEBI:17201
Return Status: 200
CHEBI:28621
Return Status: 200
CHEBI:16755
Return Status: 200
CHEBI:28834
Return Status: 200
CHEBI:6650
Return Status: 200
CHEBI:16359
Return Status: 200
CHEBI:16919
Return Status: 200
CHEBI:16737
Return Status: 200
CHEBI:17196
Return Status: 200
CHEBI:17712
Return Status: 200
CHEBI:8207
Return Status: 200
CHEBI:28911
Return Status: 200
CHEBI:32063
Return Status: 200
CHEBI:17243
Return Status: 200
CHEBI:49059
Return Status: 200
PUBCHEM:3952518
Return Status: 200
PUBCHEM:417360
Return Status: 200
PUBCHEM:23617285
Return Status: 200
CHEMBL:CHEMBL2004044
Return Status: 200


In [14]:
results = []
for chemical in genes:
    if 'answers' in genes[chemical]:
        frame = parse_answer(genes[chemical])
        frame['Chemical'] = chemical
        results.append(frame)
all_results = pd.concat(results)
all_results['type'].value_counts()

increases_expression_of                   203
interacts_with                            119
decreases_expression_of                    87
directly_interacts_with                    43
negatively_regulates__entity_to_entity     24
positively_regulates__entity_to_entity     22
affects_response_to                        20
increases_activity_of                      18
increases_molecular_modification_of        12
affects_expression_of                       9
decreases_activity_of                       8
increases_degradation_of                    5
increases_metabolic_processing_of           2
affects_molecular_modification_of           2
decreases_abundance_of                      2
increases_secretion_of                      2
affects_localization_of                     2
increases_localization_of                   1
decreases_molecular_modification_of         1
increases_synthesis_of                      1
increases_uptake_of                         1
decreases_metabolic_processing_of 

In [15]:
all_results[all_results['type']=='positively_regulates__entity_to_entity']

Unnamed: 0,result_id,result_name,source,type,Chemical
78,EC:2.8.3.25,EC:2.8.3.25,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:16325
9,EC:3.1.2.21,EC:3.1.2.21,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:30813
0,EC:3.5.2.13,EC:3.5.2.13,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:17201
118,EC:3.5.1.24,EC:3.5.1.24,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:16755
119,EC:6.2.1.7,EC:6.2.1.7,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:16755
61,EC:3.5.1.24,EC:3.5.1.24,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:28834
62,EC:2.8.3.25,EC:2.8.3.25,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:28834
16,HGNC:15919,ACOT8,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:16359
48,EC:3.5.1.24,EC:3.5.1.24,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:16359
14,EC:3.9.1.1,EC:3.9.1.1,kegg.chemical_get_enzyme,positively_regulates__entity_to_entity,CHEBI:16919


In [16]:
all_results[all_results['type']=='increases_synthesis_of']

Unnamed: 0,result_id,result_name,source,type,Chemical
13,HGNC:2768,DEK,ctd.drug_to_gene,increases_synthesis_of,CHEBI:16919


In [17]:
expand('chemical_substance',ident,'gene',predicate='increases_degradation_of')

Return Status: 200


{'message': 'Internal Server Error'}