# MELODI Presto API Example Usage

In [None]:
import json
import pandas as pd
import requests

## Configure parameters

In [None]:
API_URL = "https://melodi-presto.mrcieu.ac.uk/api/"

requests.get(f"{API_URL}/status").json()

True

## Create functions for the endpoints

Each of the endoints can be wrapped up in a function:

In [None]:
#enrich
def enrich(q):
    endpoint = "/enrich/"
    url = f"{API_URL}{endpoint}"
    params = {
        "query": q,
    }
    response = requests.post(url, data=json.dumps(params))
    try:
        res = response.json()
        enrich_df = (
                pd.json_normalize(res)
        )
        return enrich_df
    except:
        print('No data')
        return []
    
#overlap
def overlap(q1,q2):
    endpoint = "/overlap/"
    url = f"{API_URL}{endpoint}"
    params = {
        "x": q1,
        "y": q2,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    if 'data' in res:
        overlap_df = (    
            pd.json_normalize(res['data'])
        )
    else:
        overlap_df=pd.DataFrame()
    return overlap_df

def sentence(q):
    endpoint = "/sentence/"
    url = f"{API_URL}{endpoint}"
    params = {
        "pmid": pmid,
    }
    response = requests.post(url, data=json.dumps(params))
    res = response.json()
    pub_df = (
        pd.json_normalize(res['data'])
    )
    return pub_df

#### Some extra functions

It may also be useful to filter the overlap records to exclude any results derived from the same publication

In [None]:
def pub_check(row):
    px = set(row['pmids_x'].split(' '))
    py = set(row['pmids_y'].split(' '))
    check=[]
    if len(px.intersection(py)) > 0: 
        return False
    else:
        return True

### Get enriched SemMedDB objects for a given search term

Starting from a PubMed search, return all SemMedDB subject-predicate-object triples, their enrcichment metrics and publication information 

In [None]:
# query_term='chronic kidney disease'
query_term='atopic dermatitis'
enrich_df = enrich(query_term)
enrich_df

Unnamed: 0,query,triple,subject_name,subject_type,subject_id,predicate,object_name,object_type,object_id,localCount,localTotal,globalCount,globalTotal,odds,pval,pmids
0,atopic_dermatitis,Topical form corticosteroids:TREATS:Dermatitis...,Topical form corticosteroids,phsu,C0304604,TREATS,"Dermatitis, Atopic",dsyn,C0011615,208,24841,210,8040465,323.293189739664,0.0,33837558 33511682 33314940 33298043 33170543 3...
1,atopic_dermatitis,"dupilumab:TREATS:Dermatitis, Atopic",dupilumab,phsu,C3660996,TREATS,"Dermatitis, Atopic",dsyn,C0011615,203,24841,210,8040465,315.45768731228185,0.0,33894325 33889195 33866905 33851078 33844308 3...
2,atopic_dermatitis,"Cyclosporine:TREATS:Dermatitis, Atopic",Cyclosporine,phsu,C0010592,TREATS,"Dermatitis, Atopic",dsyn,C0011615,191,24841,194,8040465,321.1333433010602,0.0,31640767 31566785 31291386 31245879 31099402 3...
3,atopic_dermatitis,"IGHE:ASSOCIATED_WITH:Dermatitis, Atopic",IGHE,gngm,3497,ASSOCIATED_WITH,"Dermatitis, Atopic",dsyn,C0011615,169,24841,173,8040465,318.3523546853797,5e-324,33645549 32068307 31218782 30937128 29528575 2...
4,atopic_dermatitis,"Tacrolimus:TREATS:Dermatitis, Atopic",Tacrolimus,phsu,C0085149,TREATS,"Dermatitis, Atopic",dsyn,C0011615,122,24841,123,8040465,322.6252423582531,5.885335769260758e-235,31483891 30877873 30851973 30742708 30478741 2...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2860,atopic_dermatitis,interleukin-22:ASSOCIATED_WITH:Nasal Polyps,interleukin-22,aapp,C0961814,ASSOCIATED_WITH,Nasal Polyps,dsyn,C0027430,2,24841,2,8040465,323.7031684045251,5.668202001922647e-05,32935490
2861,atopic_dermatitis,"IL17A protein, human|IL17A:STIMULATES:CCL20 ge...","IL17A protein, human|IL17A",aapp,C1701790|3605,STIMULATES,CCL20 gene|CCL20,gngm,C1366524|6364,2,24841,5,8040465,129.48121905068643,0.0001971685569559836,33509657 33253386
2862,atopic_dermatitis,Adipokines:CAUSES:Dermatologic disorders,Adipokines,aapp,C1955907,CAUSES,Dermatologic disorders,dsyn,C0037274,2,24841,2,8040465,323.7031684045251,5.668202001922647e-05,33260746
2863,atopic_dermatitis,High Density Lipoproteins:ASSOCIATED_WITH:Derm...,High Density Lipoproteins,aapp,C0023821,ASSOCIATED_WITH,Dermatologic disorders,dsyn,C0037274,2,24841,2,8040465,323.7031684045251,5.668202001922647e-05,33271807


In [None]:
enrich_df.shape

(2865, 16)

In [None]:
print(enrich_df.triple.head(2))

0    Topical form corticosteroids:TREATS:Dermatitis...
1                  dupilumab:TREATS:Dermatitis, Atopic
Name: triple, dtype: object


### Overlapping terms

We can provide two lists of query terms and identify overlapping data

In [None]:
# x=['MLH1','MSH2','MLH3','MSH6','PMS1','PMS2','APC']
x = ['korean red ginseng']
# y=['Hereditary non-polyposis colon cancer']
y = ['dermatitis']

overlap_df = overlap(x,y)
print(overlap_df.shape)

#remove records found in the same publication
pub_filter=overlap_df.apply(pub_check,axis=1)
pub_removed = overlap_df.shape[0]-pub_filter.sum()
print(pub_removed,'records were found in the same publication')
overlap_df=overlap_df[pub_filter]
print(overlap_df.shape)
overlap_df

(568, 32)
1 records were found in the same publication
(567, 32)


Unnamed: 0,triple_x,subject_name_x,subject_type_x,subject_id_x,predicate_x,object_name_x,object_type_x,object_id_x,localCount_x,localTotal_x,...,object_type_y,object_id_y,localCount_y,localTotal_y,globalCount_y,globalTotal_y,odds_y,pval_y,pmids_y,set_y
0,Pesticide Residues:COEXISTS_WITH:Ginseng Prepa...,Pesticide Residues,hops,C0031251,COEXISTS_WITH,Ginseng Preparation,orch,C1119918,6,4955,...,dsyn,C0011615,5,24433,5,8040465,329.149337,1.000000e-10,30187459 29126802 28413317 24269244,dermatitis
1,Ginsenoside:COEXISTS_WITH:Ginseng Preparation,Ginsenoside,orch,C1165265,COEXISTS_WITH,Ginseng Preparation,orch,C1119918,5,4955,...,dsyn,C0011615,5,24433,5,8040465,329.149337,1.000000e-10,30187459 29126802 28413317 24269244,dermatitis
2,Pesticides:COEXISTS_WITH:Ginseng Preparation,Pesticides,hops,C0031253,COEXISTS_WITH,Ginseng Preparation,orch,C1119918,4,4955,...,dsyn,C0011615,5,24433,5,8040465,329.149337,1.000000e-10,30187459 29126802 28413317 24269244,dermatitis
3,RBL2 gene|RBL2:COEXISTS_WITH:Ginseng Preparation,RBL2 gene|RBL2,gngm,C1335602|5934,COEXISTS_WITH,Ginseng Preparation,orch,C1119918,4,4955,...,dsyn,C0011615,5,24433,5,8040465,329.149337,1.000000e-10,30187459 29126802 28413317 24269244,dermatitis
4,"NOS2A protein, human|NOS2:INHIBITS:Ginseng Pre...","NOS2A protein, human|NOS2",aapp,C0669372|4843,INHIBITS,Ginseng Preparation,orch,C1119918,2,4955,...,dsyn,C0011615,5,24433,5,8040465,329.149337,1.000000e-10,30187459 29126802 28413317 24269244,dermatitis
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
563,"Ginsenosides:AFFECTS:Diabetes Mellitus, Insuli...",Ginsenosides,orch,C0061278,AFFECTS,"Diabetes Mellitus, Insulin-Dependent",dsyn,C0011854,2,4955,...,dsyn,C0011615,4,24433,4,8040465,329.135904,5.800000e-09,26370535 11403811,dermatitis
564,"Ginsenosides:AFFECTS:Diabetes Mellitus, Insuli...",Ginsenosides,orch,C0061278,AFFECTS,"Diabetes Mellitus, Insulin-Dependent",dsyn,C0011854,2,4955,...,dsyn,C0011615,3,24433,3,8040465,329.122472,5.523000e-07,26253344 24053642,dermatitis
565,ursolic acid:INHIBITS:E-Selectin,ursolic acid,orch,C0077938,INHIBITS,E-Selectin,gngm,C0115305,2,4955,...,dsyn,C0011603,3,24433,3,8040465,329.122472,5.523000e-07,11355655 1708800 1706195,dermatitis
566,panaxadiol:INTERACTS_WITH:STAT3 gene|STAT3,panaxadiol,orch,C0069999,INTERACTS_WITH,STAT3 gene|STAT3,aapp,C1367307|6774,2,4955,...,dsyn,C0022398,4,24433,15,8040465,87.769454,3.148000e-07,21703716 21300911 18841165,dermatitis


Look at the overlapping data in more detail

In [None]:
overlap_counts = overlap_df.groupby('object_type_x')['object_name_x'].value_counts()
overlap_counts

object_type_x  object_name_x                       
aapp           TNF protein, human|TNF                   14
               Ovalbumin                                 3
               NF-kappa B                                2
               Brain-Derived Neurotrophic Factor         1
               Interleukin-6                             1
               Nerve Growth Factors|NGF                  1
               STAT3 gene|STAT3                          1
               VEGF protein, human|VEGFA                 1
dsyn           Dermatitis, Atopic                      312
               Allergic rhinitis NOS                     8
               Arthritis                                 6
               Obesity                                   4
               Kidney Failure, Acute                     3
               Cardiovascular Diseases                   2
               Diabetes Mellitus, Insulin-Dependent      2
               Chronic Obstructive Airway Disease        1
    

### Publicaton data

We can retrive the SemMedDB data for a give PubMed ID

In [None]:
pmid = '19755659'
pub_df = sentence(pmid)
pub_df

Unnamed: 0,PREDICATION_ID,SENTENCE_ID,PMID,PREDICATE,SUBJECT_CUI,SUBJECT_NAME,SUBJECT_SEMTYPE,OBJECT_CUI,OBJECT_NAME,OBJECT_SEMTYPE,SUB_PRED_OBJ,TYPE,NUMBER,SENT_START_INDEX,SENTENCE,SENT_END_INDEX,ISSN,DP,EDAT,PYEAR
0,131291235,124272128,19755659,INTERACTS_WITH,324,APC,gngm,C0038792,Sulindac,orch,APC:INTERACTS_WITH:Sulindac,ti,1,21,Sulindac effects on inflammation and tumorigen...,126,1460-2180,2009 Nov,2009-9-17,2009
1,131291271,124272128,19755659,INTERACTS_WITH,C0879389|4292,MLH1 gene|MLH1,gngm,C0038792,Sulindac,orch,MLH1 gene|MLH1:INTERACTS_WITH:Sulindac,ti,1,21,Sulindac effects on inflammation and tumorigen...,126,1460-2180,2009 Nov,2009-9-17,2009
2,131291696,124272130,19755659,INTERACTS_WITH,324,APC,gngm,C0038792,Sulindac,orch,APC:INTERACTS_WITH:Sulindac,ab,2,367,To further explore intestinal regional respons...,802,1460-2180,2009 Nov,2009-9-17,2009
