# HPO ID Mapping

In [1]:
import requests
import pandas as pd
import json
from pprint import pprint

In [2]:
# import the list of HPO DOIs
hpo = pd.read_csv('HPO_DOI_citations.csv')

In [3]:
hpo.head()

Unnamed: 0,10.1186/s13326-017-0144-y
0,10.1080/03610926.2016.1197254
1,10.1016/j.jbi.2017.07.012
2,10.1007/s11427-017-9099-3
3,10.1007/s00103-017-2538-5
4,10.3389/FPUBH.2017.00066


## Method definitions

In [4]:
import xml.etree.ElementTree as ET

# check to see if object exists before returning it
def key_check(key, obj):
    if key in obj.keys():
        return obj[key]
    else:
        return None

def epmc_id_lookup(pubid):
    """
    make a request to europmc api for publication data based on identifer
    arg: ext_id = pmid, pmcid, doi
    """
    base_url = 'http://www.ebi.ac.uk/europepmc/webservices/rest/search'
    params = {
        'query': pubid,
        'resulttype': 'core',
        'format': 'json'
    }
    
    try:
        results = requests.get(url=base_url, params=params)
        print (results.url)
        resultsjson = results.json()
        allresults = resultsjson['resultList']['result']
        #       test if this is a DOI
        if("10." in pubid):
            """
            searching for PMIDs and PMCIDs gets you a single definitive result; for some reason
            ...searching by DOI gets several results, so iterate and find the one that matches the input ID  
            """
            for aresult in allresults:
                if (aresult['doi']) == pubid:
                    return aresult
                # else queried ID is not a DOI but a PMCID or PMID,
                # ergo the stanza we want is just at index 0
                else:
                    print ()
                    return allresults[0]
    except Exception as e:
        print(e)
        return None

def get_pub_yr_crossref(doi):
    """
    when the DOI has no corresponding pmid or pmcid, make a request to crossref api for pubyear
    """
    base_url = 'https://api.crossref.org/works/'
    url = base_url+doi
    print(url)
    try:
        results = requests.get(url).json()
        pubyr = None

        if "published-online" in results['message'].keys():
            pubyr = results['message']["published-online"]["date-parts"][0][0]
#             print("online pubyr: "+str(pubyr))
        else: 
            pubyr = results['message']["published-print"]["date-parts"][0][0]    
#             print("print pubyr: "+str(pubyr))
        return pubyr
    
    except Exception as e:
        print(e)
        return None

## Main

In [5]:
results = list()
for index, row in hpo.iterrows():
    row_dict = {
        'pmcid': None,
        'doi': row[0],
        'pmid': None,
        'pubyear': None,
    }
    if index < 500:
        # to search by other id use row_dict['pmid]
        pubid = row_dict['doi']
        response = epmc_id_lookup(pubid)
        
        if response!=None:
            row_dict['pmcid'] = key_check('pmcid',response)
            row_dict['pmid'] = key_check('id',response)
            row_dict['pubyear'] = key_check('pubYear',response)
        else:
#           check to see if it is a DOI
            if ("10." in pubid):
                try: 
                    row_dict['pubyear'] = get_pub_yr_crossref(pubid)
                except Exception as f:
                    print ("Failed at crossref: "+ f)
            else:
                print("Requested "+ pubid + " but this call can only be used with a DOI")
#       append the results, whether from epmc or crossref  
        pprint(row_dict)   
        results.append(row_dict)


http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1080%2F03610926.2016.1197254&resulttype=core&format=json
https://api.crossref.org/works/10.1080/03610926.2016.1197254
{'doi': '10.1080/03610926.2016.1197254',
 'pmcid': None,
 'pmid': None,
 'pubyear': 2016}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1016%2Fj.jbi.2017.07.012&resulttype=core&format=json
{'doi': '10.1016/j.jbi.2017.07.012',
 'pmcid': None,
 'pmid': '28729030',
 'pubyear': '2017'}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1007%2Fs11427-017-9099-3&resulttype=core&format=json
{'doi': '10.1007/s11427-017-9099-3',
 'pmcid': None,
 'pmid': '28639105',
 'pubyear': '2017'}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1007%2Fs00103-017-2538-5&resulttype=core&format=json
{'doi': '10.1007/s00103-017-2538-5',
 'pmcid': None,
 'pmid': '28293716',
 'pubyear': '2017'}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.3389%2FFPUBH.2017.00066&resultt

http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1186%2FS13326-016-0051-7&resulttype=core&format=json

{'doi': '10.1186/S13326-016-0051-7',
 'pmcid': 'PMC4804633',
 'pmid': '27011785',
 'pubyear': '2016'}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1109%2FBIBM.2015.7359789&resulttype=core&format=json
https://api.crossref.org/works/10.1109/BIBM.2015.7359789
{'doi': '10.1109/BIBM.2015.7359789',
 'pmcid': None,
 'pmid': None,
 'pubyear': 2015}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1038%2Fnrg3932&resulttype=core&format=json
{'doi': '10.1038/nrg3932', 'pmcid': None, 'pmid': '26553330', 'pubyear': '2015'}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1145%2F2808719.2812219&resulttype=core&format=json
https://api.crossref.org/works/10.1145/2808719.2812219
{'doi': '10.1145/2808719.2812219', 'pmcid': None, 'pmid': None, 'pubyear': 2015}
http://www.ebi.ac.uk/europepmc/webservices/rest/search?query=10.1038%2Fng.3425&re

In [6]:
newFrame = pd.DataFrame(data=results)
newFrame.to_csv('mapped_ids.csv')

In [7]:
newFrame

Unnamed: 0,doi,pmcid,pmid,pubyear
0,10.1080/03610926.2016.1197254,,,2016
1,10.1016/j.jbi.2017.07.012,,28729030,2017
2,10.1007/s11427-017-9099-3,,28639105,2017
3,10.1007/s00103-017-2538-5,,28293716,2017
4,10.3389/FPUBH.2017.00066,PMC5379772,28421178,2017
5,10.1186/s12864-016-3263-4,PMC5310285,28198675,2017
6,10.2144/000114492,,28118812,2017
7,10.1093/nar/gkw1039,PMC5210535,27899602,2017
8,10.1093/nar/gkw1128,PMC5210586,27899636,2017
9,10.5582/irdr.2017.01003,PMC5359347,28357175,2017
